First, we input the data ETF by function “read.table”. Then using “view” to display the data. We name the data as “data” for short.
data <- read.table('~/data/dataquiz.txt', header= TRUE)
head(data)
## 證券代碼 簡稱 年月日 收盤價.元.
## 1 50 ????50 20100104 37.5723
## 2 52 ???? 20100104 23.1720
## 3 56 ????? 20100104 11.9283
## 4 50 ????50 20100105 37.5723
## 5 52 ???? 20100105 23.2037
## 6 56 ????? 20100105 11.8775
Then, we clean the raw data:
data <- data[,-2]
as.data.table(data)
## 證券代碼 年月日 收盤價.元.
## 1: 50 20100104 37.5723
## 2: 52 20100104 23.1720
## 3: 56 20100104 11.9283
## 4: 50 20100105 37.5723
## 5: 52 20100105 23.2037
## ---
## 8858: 52 20211229 122.8468
## 8859: 56 20211229 30.8785
## 8860: 50 20211230 140.1584
## 8861: 52 20211230 122.8010
## 8862: 56 20211230 30.8510
colnames(data) <- c('id','date','close')
data1 <- reshape2::dcast(data,date~id)
## Using close as value column: use value.var to override.
str(data)
## 'data.frame': 8862 obs. of 3 variables:
## $ id : int 50 52 56 50 52 56 50 52 56 50 ...
## $ date : int 20100104 20100104 20100104 20100105 20100105 20100105 20100106 20100106 20100106 20100107 ...
## $ close: num 37.6 23.2 11.9 37.6 23.2 ...
str(data1)
## 'data.frame': 2954 obs. of 4 variables:
## $ date: int 20100104 20100105 20100106 20100107 20100108 20100111 20100112 20100113 20100114 20100115 ...
## $ 50 : num 37.6 37.6 38.3 38.2 38.4 ...
## $ 52 : num 23.2 23.2 23.7 23.5 23.4 ...
## $ 56 : num 11.9 11.9 12.1 12 12.1 ...
dat <- data1 %>% mutate(year= as.numeric(substr(date,1,4))) %>% filter(year>= 2010 & year <=2020)
apply(is.na(dat),2,sum)
## date 50 52 56 year
## 0 0 0 0 0
str(dat)
## 'data.frame': 2710 obs. of 5 variables:
## $ date: int 20100104 20100105 20100106 20100107 20100108 20100111 20100112 20100113 20100114 20100115 ...
## $ 50 : num 37.6 37.6 38.3 38.2 38.4 ...
## $ 52 : num 23.2 23.2 23.7 23.5 23.4 ...
## $ 56 : num 11.9 11.9 12.1 12 12.1 ...
## $ year: num 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
etf.xts <- xts(dat[,-1],order.by= as.Date(as.character(dat$date),format='%Y%m%d'))
Finally:
final <- etf.xts[,-4]
head(final,6)
## 50 52 56
## 2010-01-04 37.5723 23.1720 11.9283
## 2010-01-05 37.5723 23.2037 11.8775
## 2010-01-06 38.2705 23.7433 12.0552
## 2010-01-07 38.1708 23.4894 11.9537
## 2010-01-08 38.4035 23.4195 12.0805
## 2010-01-11 38.5033 23.5529 12.2074
etf.ret <- Return.calculate(etf.xts,method ='log') %>% na.omit()
etf.w <- etf.xts%>% to.weekly(indexAt="lastof", OHLC= FALSE)
## Warning in !missing(sec) && sec%%1 != 0: 'length(x) = 567 > 1' in coercion to
## 'logical(1)'
etf.w <- Return.calculate(etf.w,method ='log') %>% na.omit()
etf.m <- etf.xts%>% to.monthly(indexAt="lastof", OHLC= FALSE)
etf.m <- Return.calculate(etf.m,method ='log') %>% na.omit()
The daily result is:
head(etf.ret,6)
## 50 52 56 year
## 2010-01-05 0.000000000 0.001367095 -0.004267874 0
## 2010-01-06 0.018412288 0.022988637 0.014850248 0
## 2010-01-07 -0.002608539 -0.010751130 -0.008455248 0
## 2010-01-08 0.006077776 -0.002980247 0.010551728 0
## 2010-01-11 0.002595351 0.005679946 0.010449743 0
## 2010-01-12 -0.005194844 -0.002699700 -0.005412989 0
The weekly result is:
head(etf.w,6)
## 50 52 56 year
## 2010-01-15 0.0008667327 0.01906600 0.013361763 0
## 2010-01-22 -0.0606249470 -0.06880198 -0.045374050 0
## 2010-01-29 -0.0260723863 -0.03184367 -0.030847250 0
## 2010-02-06 -0.0345525810 -0.04974662 -0.046237561 0
## 2010-02-10 0.0174255182 0.03493264 0.028181391 0
## 2010-02-26 -0.0106142584 -0.01050288 -0.002282367 0
The monthly result is:
head(etf.m,6)
## 50 52 56 year
## 2010-02-28 -0.02774132 -0.025316858 -0.020338537 0
## 2010-03-31 0.05105114 0.050871494 0.040280291 0
## 2010-04-30 0.01008864 0.007428255 0.023408313 0
## 2010-05-31 -0.08966970 -0.087928688 -0.065056926 0
## 2010-06-30 -0.01103730 -0.029560222 0.001368322 0
## 2010-07-31 0.08414719 0.078194680 0.087394382 0