rm(list=ls())
etf4<-read.table("ETF4_2000_2018_d.txt", fileEncoding = "UTF-8-BOM")
head(etf4)
## V1 V2 V3 V4 V5
## 1 證券代碼 簡稱 日期 未調整收盤價(元) 當日均價(元)
## 2 0050 元大台灣50 20090105 34.20 34.30
## 3 0056 元大高股息 20090105 13.92 14.00
## 4 0050 元大台灣50 20090106 34.18 34.21
## 5 0056 元大高股息 20090106 14.04 14.02
## 6 0050 元大台灣50 20090107 34.63 34.59
etf4<-read.table("ETF4_2000_2018_d_ansi.txt", header = T)
str(etf4)
## 'data.frame': 7493 obs. of 5 variables:
## $ 證券代碼 : int 50 56 50 56 50 56 50 56 50 56 ...
## $ 簡稱 : Factor w/ 4 levels "元大S&P500","元大台灣50",..: 2 3 2 3 2 3 2 3 2 3 ...
## $ 日期 : int 20090105 20090105 20090106 20090106 20090107 20090107 20090108 20090108 20090109 20090109 ...
## $ 未調整收盤價.元.: num 34.2 13.9 34.2 14 34.6 ...
## $ 當日均價.元. : num 34.3 14 34.2 14 34.6 ...
etf4<-read.table("ETF4_2000_2018_d_ansi.txt", header = T, stringsAsFactors = T,
colClasses = c("證券代碼"="character"))
str(etf4)
## 'data.frame': 7493 obs. of 5 variables:
## $ 證券代碼 : chr "0050" "0056" "0050" "0056" ...
## $ 簡稱 : Factor w/ 4 levels "元大S&P500","元大台灣50",..: 2 3 2 3 2 3 2 3 2 3 ...
## $ 日期 : int 20090105 20090105 20090106 20090106 20090107 20090107 20090108 20090108 20090109 20090109 ...
## $ 未調整收盤價.元.: num 34.2 13.9 34.2 14 34.6 ...
## $ 當日均價.元. : num 34.3 14 34.2 14 34.6 ...
head(etf4)
## 證券代碼 簡稱 日期 未調整收盤價.元. 當日均價.元.
## 1 0050 元大台灣50 20090105 34.20 34.30
## 2 0056 元大高股息 20090105 13.92 14.00
## 3 0050 元大台灣50 20090106 34.18 34.21
## 4 0056 元大高股息 20090106 14.04 14.02
## 5 0050 元大台灣50 20090107 34.63 34.59
## 6 0056 元大高股息 20090107 14.28 14.28
rm=remove
ls()=是設定為空的
rm(list=ls())=可以把所有的資料清除
txt.=文字檔 (read.table.)
header=T 是要有表投的意思
stringsAsFactors 字串變成文字分類
str=structure
etf4<-read.csv("ETF4_2000_2018_d.csv", colClasses = c("證券代碼"="character"))
str(etf4)
## 'data.frame': 7493 obs. of 5 variables:
## $ 證券代碼 : chr "0050 " "0056 " "0050 " "0056 " ...
## $ 簡稱 : Factor w/ 4 levels "元大S&P500 ",..: 2 3 2 3 2 3 2 3 2 3 ...
## $ 日期 : int 20090105 20090105 20090106 20090106 20090107 20090107 20090108 20090108 20090109 20090109 ...
## $ 未調整收盤價.元.: num 34.2 13.9 34.2 14 34.6 ...
## $ 當日均價.元. : num 34.3 14 34.2 14 34.6 ...
etf4.csv<-read.csv("ETF4_2000_2018_d.csv", fileEncoding='big5',
colClasses=c('factor', 'factor', 'factor', 'numeric', 'numeric'))
head(etf4.csv)
## 證券代碼 簡稱 日期 未調整收盤價.元. 當日均價.元.
## 1 0050 元大台灣50 20090105 34.20 34.30
## 2 0056 元大高股息 20090105 13.92 14.00
## 3 0050 元大台灣50 20090106 34.18 34.21
## 4 0056 元大高股息 20090106 14.04 14.02
## 5 0050 元大台灣50 20090107 34.63 34.59
## 6 0056 元大高股息 20090107 14.28 14.28
str(etf4.csv)
## 'data.frame': 7493 obs. of 5 variables:
## $ 證券代碼 : Factor w/ 4 levels "0050 ","0056 ",..: 1 2 1 2 1 2 1 2 1 2 ...
## $ 簡稱 : Factor w/ 4 levels "元大S&P500 ",..: 2 3 2 3 2 3 2 3 2 3 ...
## $ 日期 : Factor w/ 2474 levels "20090105","20090106",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ 未調整收盤價.元.: num 34.2 13.9 34.2 14 34.6 ...
## $ 當日均價.元. : num 34.3 14 34.2 14 34.6 ...
library(readr)
etf4_csv<-read_csv("ETF4_2000_2018_d.csv")
## Parsed with column specification:
## cols(
## `<c3><U+04A8><e9><U+00A5>N<U+00BD>X` = col_character(),
## `2<U+00BA><d9>` = col_character(),
## `<U+00A4><e9><U+00B4><c1>` = col_double(),
## `<U+00A5><U+00BC><U+00BD><U+057E><U+39AC><U+00BD>L<U+00BB><f9>(<U+00A4><U+00B8>)` = col_double(),
## `<U+00B7><ed><U+00A4><U+99E1><U+00BB><f9>(<U+00A4><U+00B8>)` = col_double()
## )
etf4_csv<-read_csv("ETF4_2000_2018_d.csv", locale = locale(encoding='big5'))
## Parsed with column specification:
## cols(
## 證券代碼 = col_character(),
## 簡稱 = col_character(),
## 日期 = col_double(),
## `未調整收盤價(元)` = col_double(),
## `當日均價(元)` = col_double()
## )
head(etf4_csv)
## # A tibble: 6 x 5
## 證券代碼 簡稱 日期 `未調整收盤價(元)` `當日均價(元)`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 0050 元大台灣50 20090105 34.2 34.3
## 2 0056 元大高股息 20090105 13.9 14
## 3 0050 元大台灣50 20090106 34.2 34.2
## 4 0056 元大高股息 20090106 14.0 14.0
## 5 0050 元大台灣50 20090107 34.6 34.6
## 6 0056 元大高股息 20090107 14.3 14.3
str(etf4_csv)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 7493 obs. of 5 variables:
## $ 證券代碼 : chr "0050" "0056" "0050" "0056" ...
## $ 簡稱 : chr "元大台灣50" "元大高股息" "元大台灣50" "元大高股息" ...
## $ 日期 : num 20090105 20090105 20090106 20090106 20090107 ...
## $ 未調整收盤價(元): num 34.2 13.9 34.2 14 34.6 ...
## $ 當日均價(元) : num 34.3 14 34.2 14 34.6 ...
## - attr(*, "spec")=
## .. cols(
## .. 證券代碼 = col_character(),
## .. 簡稱 = col_character(),
## .. 日期 = col_double(),
## .. `未調整收盤價(元)` = col_double(),
## .. `當日均價(元)` = col_double()
## .. )
head/tail 是看前/後幾筆資料
str=structure
int=整數
colclasses=c 欄位的屬性
col=列
factor(因子)
numeric(數值)
character(文字)
fileEncoding=‘big5’(有亂碼時改編碼方式)
library(readr) 執行才可跑程式
locale 是區域設定
encoding(編碼)
etf4_csv<-read_csv(“ETF4_2000_2018_d.csv”, locale = locale(encoding=‘big5’)) (會有tibble)
library(readxl)
etf4_xls<-read_excel("ETF4_2000_2018_d.xls",
col_types =c("text", "text","text", "numeric","numeric"))
head(etf4_xls)
## # A tibble: 6 x 5
## 證券代碼 簡稱 日期 `未調整收盤價(元)` `當日均價(元)`
## <chr> <chr> <chr> <dbl> <dbl>
## 1 0050 元大台灣50 20090105 34.2 34.3
## 2 0056 元大高股息 20090105 13.9 14
## 3 0050 元大台灣50 20090106 34.2 34.2
## 4 0056 元大高股息 20090106 14.0 14.0
## 5 0050 元大台灣50 20090107 34.6 34.6
## 6 0056 元大高股息 20090107 14.3 14.3
etf4.c<-etf4_csv[, c(-2, -4)]
etf4.c<-etf4.c[-1,]
colnames(etf4.c)<-c("id", "date", "price")
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
etf4.c<-etf4_csv%>%select(c(1,3,5))%>%rename("id" = "證券代碼", "date"= "日期", "price" = "當日均價(元)")
etf4.c
## # A tibble: 7,493 x 3
## id date price
## <chr> <dbl> <dbl>
## 1 0050 20090105 34.3
## 2 0056 20090105 14
## 3 0050 20090106 34.2
## 4 0056 20090106 14.0
## 5 0050 20090107 34.6
## 6 0056 20090107 14.3
## 7 0050 20090108 33.2
## 8 0056 20090108 13.9
## 9 0050 20090109 32.3
## 10 0056 20090109 13.6
## # ... with 7,483 more rows
etf4_xls<-read_excel(“ETF4_2000_2018_d.xls”, col_types =c(“text”, “text”,“text”, “numeric”,“numeric”)) 結果會是date.frame 的形式
library(readxl) 執行才可跑程式
col_types(列的形式)
etf4.c$ (看哪一個欄位)
c(欄,列)的c代表連結
colnames=欄位名稱
pipe operator (簡化成是不必要程式)
library(magrittr)、library(dplyr)執行才可跑程式
etf4_135 = etf4_csv[,(1,3,5)] (看1、3、5欄的資料)
%>% (可省略打變數名稱的時間)
id" = 證券代碼、date= 日期、price" = 當日均價(元)
library(reshape2)
etf4.reorder = dcast(etf4.c, date~id)
## Using price as value column: use value.var to override.
dim(etf4.reorder)
## [1] 2474 5
head(etf4.reorder)
## date 0050 0056 006205 00646
## 1 20090105 34.30 14.00 NA NA
## 2 20090106 34.21 14.02 NA NA
## 3 20090107 34.59 14.28 NA NA
## 4 20090108 33.21 13.86 NA NA
## 5 20090109 32.32 13.61 NA NA
## 6 20090110 31.91 13.55 NA NA
str(etf4.reorder)
## 'data.frame': 2474 obs. of 5 variables:
## $ date : num 20090105 20090106 20090107 20090108 20090109 ...
## $ 0050 : num 34.3 34.2 34.6 33.2 32.3 ...
## $ 0056 : num 14 14 14.3 13.9 13.6 ...
## $ 006205: num NA NA NA NA NA NA NA NA NA NA ...
## $ 00646 : num NA NA NA NA NA NA NA NA NA NA ...
etf4.reorder$date<-as.Date(as.character(etf4.reorder$date), "%Y%m%d")
head(etf4.reorder)
## date 0050 0056 006205 00646
## 1 2009-01-05 34.30 14.00 NA NA
## 2 2009-01-06 34.21 14.02 NA NA
## 3 2009-01-07 34.59 14.28 NA NA
## 4 2009-01-08 33.21 13.86 NA NA
## 5 2009-01-09 32.32 13.61 NA NA
## 6 2009-01-10 31.91 13.55 NA NA
str(etf4.reorder)
## 'data.frame': 2474 obs. of 5 variables:
## $ date : Date, format: "2009-01-05" "2009-01-06" ...
## $ 0050 : num 34.3 34.2 34.6 33.2 32.3 ...
## $ 0056 : num 14 14 14.3 13.9 13.6 ...
## $ 006205: num NA NA NA NA NA NA NA NA NA NA ...
## $ 00646 : num NA NA NA NA NA NA NA NA NA NA ...
library(reshape2) 執行才可跑程式
reorder 為重新整理的意思
etf4.reorder = dcast(etf4.c, date~id) -用日期做一個欄位的分類,做資料重整
etf4.reorder\(date<-as.Date(as.character(etf4.reorder\)date), “%Y%m%d”- 把文字轉成數字再轉成日期
%Y大寫為4個數字(2009)、小寫為2個數字(09)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
etf4.xts<-xts(etf4.reorder[,-1], order.by = etf4.reorder$date)
head(etf4.xts)
## 0050 0056 006205 00646
## 2009-01-05 34.30 14.00 NA NA
## 2009-01-06 34.21 14.02 NA NA
## 2009-01-07 34.59 14.28 NA NA
## 2009-01-08 33.21 13.86 NA NA
## 2009-01-09 32.32 13.61 NA NA
## 2009-01-10 31.91 13.55 NA NA
tail(etf4.xts)
## 0050 0056 006205 00646
## 2018-12-22 74.75 24.15 25.08 22.93
## 2018-12-24 74.67 24.16 25.25 22.72
## 2018-12-25 73.57 23.90 24.90 22.51
## 2018-12-26 73.87 23.83 25.16 22.13
## 2018-12-27 74.81 23.96 25.30 22.95
## 2018-12-28 75.21 23.92 25.24 23.16
str(etf4.xts)
## An 'xts' object on 2009-01-05/2018-12-28 containing:
## Data: num [1:2474, 1:4] 34.3 34.2 34.6 33.2 32.3 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "0050" "0056" "006205" "00646"
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
etf4.xts<-na.locf(etf4.xts)
tail(etf4.xts)
## 0050 0056 006205 00646
## 2018-12-22 74.75 24.15 25.08 22.93
## 2018-12-24 74.67 24.16 25.25 22.72
## 2018-12-25 73.57 23.90 24.90 22.51
## 2018-12-26 73.87 23.83 25.16 22.13
## 2018-12-27 74.81 23.96 25.30 22.95
## 2018-12-28 75.21 23.92 25.24 23.16
etf4.xts.fill<-na.locf(etf4.xts, fromLast = TRUE)
head(etf4.xts.fill)
## 0050 0056 006205 00646
## 2009-01-05 34.30 14.00 20.33 19.54
## 2009-01-06 34.21 14.02 20.33 19.54
## 2009-01-07 34.59 14.28 20.33 19.54
## 2009-01-08 33.21 13.86 20.33 19.54
## 2009-01-09 32.32 13.61 20.33 19.54
## 2009-01-10 31.91 13.55 20.33 19.54
etf4.xts<-na.omit(etf4.xts)
head(etf4.xts)
## 0050 0056 006205 00646
## 2015-12-14 59.35 21.06 30.98 19.54
## 2015-12-15 59.59 21.25 31.66 19.70
## 2015-12-16 60.11 21.50 31.67 19.80
## 2015-12-17 60.78 21.76 32.06 20.05
## 2015-12-18 60.78 21.97 32.23 19.87
## 2015-12-21 60.31 21.99 32.62 19.64
xts是處理時間序列的資料
head/tail 是看前/後幾筆資料 na.locf中locf=last obs. carried forward (用現有的資料補齊沒有的資料)
last obs. carried forward (用未來的資料補前面沒有的資料)
na.omit中omit代表省略的意思
etf4.xts<-na.omit(etf4.xts)-刪除沒有值得部分
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following object is masked from 'package:magrittr':
##
## extract
etf4.xts1<-etf4.xts[complete.cases(etf4.xts),]
head(etf4.xts1)
## 0050 0056 006205 00646
## 2015-12-14 59.35 21.06 30.98 19.54
## 2015-12-15 59.59 21.25 31.66 19.70
## 2015-12-16 60.11 21.50 31.67 19.80
## 2015-12-17 60.78 21.76 32.06 20.05
## 2015-12-18 60.78 21.97 32.23 19.87
## 2015-12-21 60.31 21.99 32.62 19.64
lag_x <- lag(etf4.xts$`0050`, 1)
head(lag_x)
## 0050
## 2015-12-14 NA
## 2015-12-15 59.35
## 2015-12-16 59.59
## 2015-12-17 60.11
## 2015-12-18 60.78
## 2015-12-21 60.78
complete.cases為完整的資料
[lag operator(落後運算元)]
write.csv(etf4.xts, file = "myetf4.csv")
write.zoo(etf4.xts, sep = ',', file = "myetf4.csv.1")
saveRDS(etf4.xts, file = "etf4.xts.rds")
etf4.xts2 <- readRDS("etf4.xts.rds")
head(etf4.xts2)
## 0050 0056 006205 00646
## 2015-12-14 59.35 21.06 30.98 19.54
## 2015-12-15 59.59 21.25 31.66 19.70
## 2015-12-16 60.11 21.50 31.67 19.80
## 2015-12-17 60.78 21.76 32.06 20.05
## 2015-12-18 60.78 21.97 32.23 19.87
## 2015-12-21 60.31 21.99 32.62 19.64
etf4.zoo <- read.zoo("myetf4.csv.1", header = TRUE, index.column =1,
sep = ",", format = "%Y-%m-%d")
head(etf4.zoo)
## X0050 X0056 X006205 X00646
## 2015-12-14 59.35 21.06 30.98 19.54
## 2015-12-15 59.59 21.25 31.66 19.70
## 2015-12-16 60.11 21.50 31.67 19.80
## 2015-12-17 60.78 21.76 32.06 20.05
## 2015-12-18 60.78 21.97 32.23 19.87
## 2015-12-21 60.31 21.99 32.62 19.64
class(etf4.zoo)
## [1] "zoo"
etf4.xts3<-as.xts(etf4.zoo)
head(etf4.xts3)
## X0050 X0056 X006205 X00646
## 2015-12-14 59.35 21.06 30.98 19.54
## 2015-12-15 59.59 21.25 31.66 19.70
## 2015-12-16 60.11 21.50 31.67 19.80
## 2015-12-17 60.78 21.76 32.06 20.05
## 2015-12-18 60.78 21.97 32.23 19.87
## 2015-12-21 60.31 21.99 32.62 19.64
etf4_2016<-etf4.xts['2016']
etf4_2016_01_06 <- etf4.xts["20160101/20160630"]
head(etf4_2016_01_06)
## 0050 0056 006205 00646
## 2016-01-04 59.62 21.51 31.02 19.90
## 2016-01-05 59.30 21.42 29.94 19.78
## 2016-01-06 58.33 21.15 29.94 19.68
## 2016-01-07 57.30 20.91 28.41 19.50
## 2016-01-08 57.33 20.95 28.62 19.32
## 2016-01-11 56.47 20.64 28.47 18.92
lastweek <- last(etf4_2016, "1 week")
last(lastweek, 2)
## 0050 0056 006205 00646
## 2016-12-29 71.35 22.97 26.96 21.55
## 2016-12-30 71.77 23.06 27.00 21.54
first(lastweek, "-2 days")
## 0050 0056 006205 00646
## 2016-12-28 71.37 22.96 27.01 21.62
## 2016-12-29 71.35 22.97 26.96 21.55
## 2016-12-30 71.77 23.06 27.00 21.54
輸出csv檔可使用write.csv函數
write.zoo 可保存時間序列
日期索引消失必須使用write.zoo來保存xts文件
saveRDS 可把大量資料存在較小的地方
lastweek <- last(etf4_2016, “1 week”) - 打印上週的最後2個觀察結果
last(lastweek, 2) - 提取上週前兩天的所有內容