rm()=移除目前的資料

rm=remove

str=strcture

numeric:數值

int=integer:整數

colClasses=欄位屬性

rm(list=ls())
etf4<-read.table("ETF4_2000_2018_d.txt", fileEncoding = "UTF-8-BOM")
head(etf4)
##         V1         V2       V3               V4           V5
## 1 證券代碼       簡稱     日期 未調整收盤價(元) 當日均價(元)
## 2     0050 元大台灣50 20090105            34.20        34.30
## 3     0056 元大高股息 20090105            13.92        14.00
## 4     0050 元大台灣50 20090106            34.18        34.21
## 5     0056 元大高股息 20090106            14.04        14.02
## 6     0050 元大台灣50 20090107            34.63        34.59
etf4<-read.table("ETF4_2000_2018_d_ansi.txt", header = T)
str(etf4)
## 'data.frame':    7493 obs. of  5 variables:
##  $ 證券代碼        : int  50 56 50 56 50 56 50 56 50 56 ...
##  $ 簡稱            : Factor w/ 4 levels "元大S&P500","元大台灣50",..: 2 3 2 3 2 3 2 3 2 3 ...
##  $ 日期            : int  20090105 20090105 20090106 20090106 20090107 20090107 20090108 20090108 20090109 20090109 ...
##  $ 未調整收盤價.元.: num  34.2 13.9 34.2 14 34.6 ...
##  $ 當日均價.元.    : num  34.3 14 34.2 14 34.6 ...
etf4<-read.table("ETF4_2000_2018_d_ansi.txt", header = T, stringsAsFactors = T, 
                 colClasses = c("證券代碼"="character"))
str(etf4)
## 'data.frame':    7493 obs. of  5 variables:
##  $ 證券代碼        : chr  "0050" "0056" "0050" "0056" ...
##  $ 簡稱            : Factor w/ 4 levels "元大S&P500","元大台灣50",..: 2 3 2 3 2 3 2 3 2 3 ...
##  $ 日期            : int  20090105 20090105 20090106 20090106 20090107 20090107 20090108 20090108 20090109 20090109 ...
##  $ 未調整收盤價.元.: num  34.2 13.9 34.2 14 34.6 ...
##  $ 當日均價.元.    : num  34.3 14 34.2 14 34.6 ...
head(etf4)
##   證券代碼       簡稱     日期 未調整收盤價.元. 當日均價.元.
## 1     0050 元大台灣50 20090105            34.20        34.30
## 2     0056 元大高股息 20090105            13.92        14.00
## 3     0050 元大台灣50 20090106            34.18        34.21
## 4     0056 元大高股息 20090106            14.04        14.02
## 5     0050 元大台灣50 20090107            34.63        34.59
## 6     0056 元大高股息 20090107            14.28        14.28
etf4<-read.csv("ETF4_2000_2018_d.csv", colClasses = c("證券代碼"="character"))
str(etf4)
## 'data.frame':    7493 obs. of  5 variables:
##  $ 證券代碼        : chr  "0050   " "0056   " "0050   " "0056   " ...
##  $ 簡稱            : Factor w/ 4 levels "元大S&P500   ",..: 2 3 2 3 2 3 2 3 2 3 ...
##  $ 日期            : int  20090105 20090105 20090106 20090106 20090107 20090107 20090108 20090108 20090109 20090109 ...
##  $ 未調整收盤價.元.: num  34.2 13.9 34.2 14 34.6 ...
##  $ 當日均價.元.    : num  34.3 14 34.2 14 34.6 ...
etf4.csv<-read.csv("ETF4_2000_2018_d.csv", fileEncoding='big5',
                   colClasses=c('factor', 'factor', 'factor', 'numeric', 'numeric'))
head(etf4.csv)
##   證券代碼          簡稱     日期 未調整收盤價.元. 當日均價.元.
## 1  0050    元大台灣50    20090105            34.20        34.30
## 2  0056    元大高股息    20090105            13.92        14.00
## 3  0050    元大台灣50    20090106            34.18        34.21
## 4  0056    元大高股息    20090106            14.04        14.02
## 5  0050    元大台灣50    20090107            34.63        34.59
## 6  0056    元大高股息    20090107            14.28        14.28
str(etf4.csv)
## 'data.frame':    7493 obs. of  5 variables:
##  $ 證券代碼        : Factor w/ 4 levels "0050   ","0056   ",..: 1 2 1 2 1 2 1 2 1 2 ...
##  $ 簡稱            : Factor w/ 4 levels "元大S&P500   ",..: 2 3 2 3 2 3 2 3 2 3 ...
##  $ 日期            : Factor w/ 2474 levels "20090105","20090106",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ 未調整收盤價.元.: num  34.2 13.9 34.2 14 34.6 ...
##  $ 當日均價.元.    : num  34.3 14 34.2 14 34.6 ...
options(repos = "https://cran.rstudio.com")
install.packages("readr")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(readr)
etf4_csv<-read_csv("ETF4_2000_2018_d.csv", locale = locale(encoding='big5'))
## Parsed with column specification:
## cols(
##   證券代碼 = col_character(),
##   簡稱 = col_character(),
##   日期 = col_double(),
##   `未調整收盤價(元)` = col_double(),
##   `當日均價(元)` = col_double()
## )
head(etf4_csv)
## # A tibble: 6 x 5
##   證券代碼 簡稱           日期 `未調整收盤價(元)` `當日均價(元)`
##   <chr>    <chr>         <dbl>              <dbl>          <dbl>
## 1 0050     元大台灣50 20090105               34.2           34.3
## 2 0056     元大高股息 20090105               13.9           14  
## 3 0050     元大台灣50 20090106               34.2           34.2
## 4 0056     元大高股息 20090106               14.0           14.0
## 5 0050     元大台灣50 20090107               34.6           34.6
## 6 0056     元大高股息 20090107               14.3           14.3
str(etf4_csv)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 7493 obs. of  5 variables:
##  $ 證券代碼        : chr  "0050" "0056" "0050" "0056" ...
##  $ 簡稱            : chr  "元大台灣50" "元大高股息" "元大台灣50" "元大高股息" ...
##  $ 日期            : num  20090105 20090105 20090106 20090106 20090107 ...
##  $ 未調整收盤價(元): num  34.2 13.9 34.2 14 34.6 ...
##  $ 當日均價(元)    : num  34.3 14 34.2 14 34.6 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   證券代碼 = col_character(),
##   ..   簡稱 = col_character(),
##   ..   日期 = col_double(),
##   ..   `未調整收盤價(元)` = col_double(),
##   ..   `當日均價(元)` = col_double()
##   .. )
install.packages("readxl")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(readxl)
etf4_xls<-read_excel("ETF4_2000_2018_d.xls", 
                     col_types =c("text","text","text","numeric","numeric"))
head(etf4_xls)
## # A tibble: 6 x 5
##   證券代碼 簡稱       日期     `未調整收盤價(元)` `當日均價(元)`
##   <chr>    <chr>      <chr>                 <dbl>          <dbl>
## 1 0050     元大台灣50 20090105               34.2           34.3
## 2 0056     元大高股息 20090105               13.9           14  
## 3 0050     元大台灣50 20090106               34.2           34.2
## 4 0056     元大高股息 20090106               14.0           14.0
## 5 0050     元大台灣50 20090107               34.6           34.6
## 6 0056     元大高股息 20090107               14.3           14.3

colnames=name of columns(欄位名稱)

etf4.c<-etf4_csv[, c(-2, -4)]
etf4.c<-etf4.c[-1,]
colnames(etf4.c)<-c("id", "date", "price")
install.packages("magrittr")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'magrittr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(magrittr)
options(repos = "https://cran.rstudio.com")
install.packages("dplyr")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
etf4.c<-etf4_csv%>%select(c(1,3,5))%>%rename("id" = "證券代碼", "date"= "日期", "price" = "當日均價(元)")
etf4.c
## # A tibble: 7,493 x 3
##    id        date price
##    <chr>    <dbl> <dbl>
##  1 0050  20090105  34.3
##  2 0056  20090105  14  
##  3 0050  20090106  34.2
##  4 0056  20090106  14.0
##  5 0050  20090107  34.6
##  6 0056  20090107  14.3
##  7 0050  20090108  33.2
##  8 0056  20090108  13.9
##  9 0050  20090109  32.3
## 10 0056  20090109  13.6
## # ... with 7,483 more rows

dim()=顯示變數與個案數

Y%:四位數的年

reorder:重新整理

as.Date:轉換成日期資料

as.numeric:轉換成數值資料

as.character:轉換成文字資料

xts:專門處理時間數列資料

pipe operator:刪除多餘贅字

install.packages("reshape2")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'reshape2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(reshape2)
etf4.reorder = dcast(etf4.c, date~id)
## Using price as value column: use value.var to override.
dim(etf4.reorder)
## [1] 2474    5
head(etf4.reorder)
##       date  0050  0056 006205 00646
## 1 20090105 34.30 14.00     NA    NA
## 2 20090106 34.21 14.02     NA    NA
## 3 20090107 34.59 14.28     NA    NA
## 4 20090108 33.21 13.86     NA    NA
## 5 20090109 32.32 13.61     NA    NA
## 6 20090110 31.91 13.55     NA    NA
str(etf4.reorder)
## 'data.frame':    2474 obs. of  5 variables:
##  $ date  : num  20090105 20090106 20090107 20090108 20090109 ...
##  $ 0050  : num  34.3 34.2 34.6 33.2 32.3 ...
##  $ 0056  : num  14 14 14.3 13.9 13.6 ...
##  $ 006205: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ 00646 : num  NA NA NA NA NA NA NA NA NA NA ...
etf4.reorder$date<-as.Date(as.character(etf4.reorder$date), "%Y%m%d") 
head(etf4.reorder)
##         date  0050  0056 006205 00646
## 1 2009-01-05 34.30 14.00     NA    NA
## 2 2009-01-06 34.21 14.02     NA    NA
## 3 2009-01-07 34.59 14.28     NA    NA
## 4 2009-01-08 33.21 13.86     NA    NA
## 5 2009-01-09 32.32 13.61     NA    NA
## 6 2009-01-10 31.91 13.55     NA    NA
str(etf4.reorder)
## 'data.frame':    2474 obs. of  5 variables:
##  $ date  : Date, format: "2009-01-05" "2009-01-06" ...
##  $ 0050  : num  34.3 34.2 34.6 33.2 32.3 ...
##  $ 0056  : num  14 14 14.3 13.9 13.6 ...
##  $ 006205: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ 00646 : num  NA NA NA NA NA NA NA NA NA NA ...
install.packages("xts")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'xts' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
etf4.xts<-xts(etf4.reorder[,-1], order.by = etf4.reorder$date)
head(etf4.xts)
##             0050  0056 006205 00646
## 2009-01-05 34.30 14.00     NA    NA
## 2009-01-06 34.21 14.02     NA    NA
## 2009-01-07 34.59 14.28     NA    NA
## 2009-01-08 33.21 13.86     NA    NA
## 2009-01-09 32.32 13.61     NA    NA
## 2009-01-10 31.91 13.55     NA    NA
tail(etf4.xts)
##             0050  0056 006205 00646
## 2018-12-22 74.75 24.15  25.08 22.93
## 2018-12-24 74.67 24.16  25.25 22.72
## 2018-12-25 73.57 23.90  24.90 22.51
## 2018-12-26 73.87 23.83  25.16 22.13
## 2018-12-27 74.81 23.96  25.30 22.95
## 2018-12-28 75.21 23.92  25.24 23.16
str(etf4.xts)
## An 'xts' object on 2009-01-05/2018-12-28 containing:
##   Data: num [1:2474, 1:4] 34.3 34.2 34.6 33.2 32.3 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "0050" "0056" "006205" "00646"
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL

na.locf:補滿

etf4.xts<-na.locf(etf4.xts)                
tail(etf4.xts)
##             0050  0056 006205 00646
## 2018-12-22 74.75 24.15  25.08 22.93
## 2018-12-24 74.67 24.16  25.25 22.72
## 2018-12-25 73.57 23.90  24.90 22.51
## 2018-12-26 73.87 23.83  25.16 22.13
## 2018-12-27 74.81 23.96  25.30 22.95
## 2018-12-28 75.21 23.92  25.24 23.16
etf4.xts.fill<-na.locf(etf4.xts, fromLast = TRUE) 
head(etf4.xts.fill)
##             0050  0056 006205 00646
## 2009-01-05 34.30 14.00  20.33 19.54
## 2009-01-06 34.21 14.02  20.33 19.54
## 2009-01-07 34.59 14.28  20.33 19.54
## 2009-01-08 33.21 13.86  20.33 19.54
## 2009-01-09 32.32 13.61  20.33 19.54
## 2009-01-10 31.91 13.55  20.33 19.54

na.omit:去除na值

etf4.xts<-na.omit(etf4.xts)
head(etf4.xts)
##             0050  0056 006205 00646
## 2015-12-14 59.35 21.06  30.98 19.54
## 2015-12-15 59.59 21.25  31.66 19.70
## 2015-12-16 60.11 21.50  31.67 19.80
## 2015-12-17 60.78 21.76  32.06 20.05
## 2015-12-18 60.78 21.97  32.23 19.87
## 2015-12-21 60.31 21.99  32.62 19.64
install.packages("tidyr")
## Installing package into 'C:/Users/USER/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'tidyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\USER\AppData\Local\Temp\RtmpSOJ252\downloaded_packages
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
## The following object is masked from 'package:magrittr':
## 
##     extract
etf4.xts1<-etf4.xts[complete.cases(etf4.xts),]
head(etf4.xts1)
##             0050  0056 006205 00646
## 2015-12-14 59.35 21.06  30.98 19.54
## 2015-12-15 59.59 21.25  31.66 19.70
## 2015-12-16 60.11 21.50  31.67 19.80
## 2015-12-17 60.78 21.76  32.06 20.05
## 2015-12-18 60.78 21.97  32.23 19.87
## 2015-12-21 60.31 21.99  32.62 19.64

readRDS:儲存大資料

write.csv(etf4.xts, file = "myetf4.csv")
write.zoo(etf4.xts, sep = ',', file = "myetf4.csv.1")
saveRDS(etf4.xts, file = "etf4.xts.rds")
etf4.xts2 <- readRDS("etf4.xts.rds")
head(etf4.xts2)
##             0050  0056 006205 00646
## 2015-12-14 59.35 21.06  30.98 19.54
## 2015-12-15 59.59 21.25  31.66 19.70
## 2015-12-16 60.11 21.50  31.67 19.80
## 2015-12-17 60.78 21.76  32.06 20.05
## 2015-12-18 60.78 21.97  32.23 19.87
## 2015-12-21 60.31 21.99  32.62 19.64
etf4.zoo <- read.zoo("myetf4.csv.1", header = TRUE, index.column =1, 
                     sep = ",", format = "%Y-%m-%d")
head(etf4.zoo)
##            X0050 X0056 X006205 X00646
## 2015-12-14 59.35 21.06   30.98  19.54
## 2015-12-15 59.59 21.25   31.66  19.70
## 2015-12-16 60.11 21.50   31.67  19.80
## 2015-12-17 60.78 21.76   32.06  20.05
## 2015-12-18 60.78 21.97   32.23  19.87
## 2015-12-21 60.31 21.99   32.62  19.64
class(etf4.zoo)
## [1] "zoo"
etf4.xts3<-as.xts(etf4.zoo)
head(etf4.xts3)
##            X0050 X0056 X006205 X00646
## 2015-12-14 59.35 21.06   30.98  19.54
## 2015-12-15 59.59 21.25   31.66  19.70
## 2015-12-16 60.11 21.50   31.67  19.80
## 2015-12-17 60.78 21.76   32.06  20.05
## 2015-12-18 60.78 21.97   32.23  19.87
## 2015-12-21 60.31 21.99   32.62  19.64
etf4_2016<-etf4.xts['2016']
etf4_2016_01_06 <- etf4.xts["20160101/20160630"]
head(etf4_2016_01_06)
##             0050  0056 006205 00646
## 2016-01-04 59.62 21.51  31.02 19.90
## 2016-01-05 59.30 21.42  29.94 19.78
## 2016-01-06 58.33 21.15  29.94 19.68
## 2016-01-07 57.30 20.91  28.41 19.50
## 2016-01-08 57.33 20.95  28.62 19.32
## 2016-01-11 56.47 20.64  28.47 18.92
lastweek <- last(etf4_2016, "1 week")
last(lastweek, 2)
##             0050  0056 006205 00646
## 2016-12-29 71.35 22.97  26.96 21.55
## 2016-12-30 71.77 23.06  27.00 21.54
first(lastweek, "-2 days")
##             0050  0056 006205 00646
## 2016-12-28 71.37 22.96  27.01 21.62
## 2016-12-29 71.35 22.97  26.96 21.55
## 2016-12-30 71.77 23.06  27.00 21.54