Today we are goint to explore Time Series libraries in R. Most of the information comes from the XTS vignette by Jeffrey A. Ryan and Joshua M. Ulrich.
Sys.Date()
## [1] "2014-08-22"
class(Sys.Date())
## [1] "Date"
Sys.time()
## [1] "2014-08-22 18:28:40 EDT"
class(Sys.time())
## [1] "POSIXct" "POSIXt"
# from ?DateTimeClasses
Sys.time() - 3600 # an hour ago
## [1] "2014-08-22 17:28:40 EDT"
as.POSIXlt(Sys.time(), "GMT") # the current time in GMT
## [1] "2014-08-22 22:28:40 GMT"
charDateTime <- "2012-06-30 20:00:00 EDT"
class(charDateTime)
## [1] "character"
## look at *internal* representation of "POSIXlt" :
leapS <- as.POSIXlt(charDateTime)
class(leapS)
## [1] "POSIXlt" "POSIXt"
is.list(leapS)
## [1] TRUE
str(unclass(leapS))
## List of 11
## $ sec : num 0
## $ min : int 0
## $ hour : int 20
## $ mday : int 30
## $ mon : int 5
## $ year : int 112
## $ wday : int 6
## $ yday : int 181
## $ isdst : int 1
## $ zone : chr "EDT"
## $ gmtoff: int NA
leapS <- as.POSIXct(charDateTime)
class(leapS)
## [1] "POSIXct" "POSIXt"
is.list(leapS)
## [1] FALSE
unclass(leapS)
## [1] 1.341e+09
## attr(,"tzone")
## [1] ""
str(unclass(leapS))
## atomic [1:1] 1.34e+09
## - attr(*, "tzone")= chr ""
# from ?as.POSIX*
(z <- Sys.time()) # the current datetime, as class "POSIXct"
## [1] "2014-08-22 18:28:40 EDT"
unclass(z) # a large integer
## [1] 1.409e+09
floor(unclass(z) / (60 * 60 * 24)) # the number of days since 1970-01-01 (UTC)
## [1] 16304
(now <- as.POSIXlt(Sys.time())) # the current datetime, as class "POSIXlt"
## [1] "2014-08-22 18:28:40 EDT"
unlist(unclass(now)) # a list shown as a named vector
## sec min hour
## "40.5824859142303" "28" "18"
## mday mon year
## "22" "7" "114"
## wday yday isdst
## "5" "233" "1"
## zone gmtoff
## "EDT" "-14400"
now$year + 1900 # see ?DateTimeClasses
## [1] 2014
## These may not be correct names on your system
as.POSIXlt(Sys.time(), "America/New_York") # in New York
## [1] "2014-08-22 18:28:40 EDT"
as.POSIXlt(Sys.time(), "EST5EDT") # alternative.
## [1] "2014-08-22 18:28:40 EDT"
as.POSIXlt(Sys.time(), "EST" ) # somewhere in Eastern Canada
## [1] "2014-08-22 17:28:40 EST"
as.POSIXlt(Sys.time(), "HST") # in Hawaii
## [1] "2014-08-22 12:28:40 HST"
as.POSIXlt(Sys.time(), "Australia/Darwin")
## [1] "2014-08-23 07:58:40 CST"
library("xts")
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#
data(sample_matrix)
head(sample_matrix)
## Open High Low Close
## 2007-01-02 50.04 50.12 49.95 50.12
## 2007-01-03 50.23 50.42 50.23 50.40
## 2007-01-04 50.42 50.42 50.26 50.33
## 2007-01-05 50.37 50.37 50.22 50.33
## 2007-01-06 50.24 50.24 50.11 50.18
## 2007-01-07 50.13 50.22 49.99 49.99
class(sample_matrix)
## [1] "matrix"
dim(sample_matrix)
## [1] 180 4
rownames(sample_matrix)[1:10]
## [1] "2007-01-02" "2007-01-03" "2007-01-04" "2007-01-05" "2007-01-06"
## [6] "2007-01-07" "2007-01-08" "2007-01-09" "2007-01-10" "2007-01-11"
# casting to xts
matrix_xts <- as.xts(sample_matrix, dateFormat = 'Date')
class(matrix_xts)
## [1] "xts" "zoo"
str(matrix_xts)
## An 'xts' object on 2007-01-02/2007-06-30 containing:
## Data: num [1:180, 1:4] 50 50.2 50.4 50.4 50.2 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "Open" "High" "Low" "Close"
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
head(matrix_xts)
## Open High Low Close
## 2007-01-02 50.04 50.12 49.95 50.12
## 2007-01-03 50.23 50.42 50.23 50.40
## 2007-01-04 50.42 50.42 50.26 50.33
## 2007-01-05 50.37 50.37 50.22 50.33
## 2007-01-06 50.24 50.24 50.11 50.18
## 2007-01-07 50.13 50.22 49.99 49.99
# creating a new xts
new.xts <- xts(rnorm(10), Sys.Date() + 1:10)
new.xts
## [,1]
## 2014-08-23 0.1239
## 2014-08-24 0.2159
## 2014-08-25 0.3796
## 2014-08-26 -0.5023
## 2014-08-27 -0.3332
## 2014-08-28 -1.0186
## 2014-08-29 -1.0718
## 2014-08-30 0.3035
## 2014-08-31 0.4482
## 2014-09-01 0.0530
str(new.xts)
## An 'xts' object on 2014-08-23/2014-09-01 containing:
## Data: num [1:10, 1] 0.124 0.216 0.38 -0.502 -0.333 ...
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## NULL
# subsetting xts
matrix_xts['2007-03']
## Open High Low Close
## 2007-03-01 50.82 50.82 50.56 50.57
## 2007-03-02 50.61 50.72 50.51 50.62
## 2007-03-03 50.73 50.73 50.41 50.41
## 2007-03-04 50.39 50.41 50.25 50.33
## 2007-03-05 50.27 50.34 50.27 50.30
## 2007-03-06 50.27 50.32 50.16 50.16
## 2007-03-07 50.14 50.20 49.91 49.91
## 2007-03-08 49.93 50.00 49.85 49.92
## 2007-03-09 49.92 49.92 49.74 49.81
## 2007-03-10 49.79 49.89 49.70 49.89
## 2007-03-11 49.83 49.88 49.76 49.79
## 2007-03-12 49.83 49.90 49.67 49.74
## 2007-03-13 49.70 49.71 49.38 49.38
## 2007-03-14 49.36 49.54 49.31 49.54
## 2007-03-15 49.57 49.62 49.40 49.50
## 2007-03-16 49.45 49.65 49.42 49.60
## 2007-03-17 49.56 49.56 49.34 49.35
## 2007-03-18 49.30 49.68 49.30 49.65
## 2007-03-19 49.63 49.65 49.52 49.55
## 2007-03-20 49.60 49.62 49.42 49.51
## 2007-03-21 49.50 49.54 49.42 49.52
## 2007-03-22 49.42 49.42 49.31 49.40
## 2007-03-23 49.27 49.27 48.93 48.93
## 2007-03-24 48.87 48.87 48.53 48.53
## 2007-03-25 48.51 48.51 48.33 48.34
## 2007-03-26 48.34 48.45 48.29 48.29
## 2007-03-27 48.25 48.42 48.24 48.31
## 2007-03-28 48.33 48.54 48.33 48.54
## 2007-03-29 48.59 48.70 48.57 48.70
## 2007-03-30 48.75 49.00 48.75 48.94
## 2007-03-31 48.96 49.10 48.96 48.97
# from the beginnig through jan 04
matrix_xts['/2007-01-04']
## Open High Low Close
## 2007-01-02 50.04 50.12 49.95 50.12
## 2007-01-03 50.23 50.42 50.23 50.40
## 2007-01-04 50.42 50.42 50.26 50.33
# from the from jun 23 through the end
matrix_xts['2007-06-23/']
## Open High Low Close
## 2007-06-23 47.23 47.25 47.09 47.25
## 2007-06-24 47.24 47.30 47.21 47.23
## 2007-06-25 47.20 47.43 47.13 47.43
## 2007-06-26 47.44 47.62 47.44 47.62
## 2007-06-27 47.62 47.72 47.60 47.63
## 2007-06-28 47.68 47.70 47.57 47.61
## 2007-06-29 47.64 47.78 47.62 47.66
## 2007-06-30 47.67 47.94 47.67 47.77
# first one week of data
first(matrix_xts, '1 week')
## Open High Low Close
## 2007-01-02 50.04 50.12 49.95 50.12
## 2007-01-03 50.23 50.42 50.23 50.40
## 2007-01-04 50.42 50.42 50.26 50.33
## 2007-01-05 50.37 50.37 50.22 50.33
## 2007-01-06 50.24 50.24 50.11 50.18
## 2007-01-07 50.13 50.22 49.99 49.99
# first 3 days of the last week of the data
first(last(matrix_xts,'1 week'),'3 days')
## Open High Low Close
## 2007-06-25 47.20 47.43 47.13 47.43
## 2007-06-26 47.44 47.62 47.44 47.62
## 2007-06-27 47.62 47.72 47.60 47.63
# plotting
axTicksByTime(matrix_xts, ticks.on = 'months')
## Jan 02 2007 Feb 01 2007 Mar 01 2007 Apr 01 2007 May 01 2007 Jun 01 2007
## 1 31 59 90 120 151
## Jun 30 2007
## 180
plot(matrix_xts[, 2], major.ticks = 'months',
minor.ticks = FALSE, main = NULL, col = 3)
periodicity(matrix_xts)
## Daily periodicity from 2007-01-02 to 2007-06-30
endpoints(matrix_xts, on = 'months')
## [1] 0 30 58 89 119 150 180
endpoints(matrix_xts, on = 'weeks')
## [1] 0 6 13 20 27 34 41 48 55 62 69 76 83 90 97 104 111
## [18] 118 125 132 139 146 153 160 167 174 180
to.period(matrix_xts, 'months')
## matrix_xts.Open matrix_xts.High matrix_xts.Low matrix_xts.Close
## 2007-01-31 50.04 50.77 49.76 50.23
## 2007-02-28 50.22 51.32 50.19 50.77
## 2007-03-31 50.82 50.82 48.24 48.97
## 2007-04-30 48.94 50.34 48.81 49.34
## 2007-05-31 49.35 49.69 47.52 47.74
## 2007-06-30 47.74 47.94 47.09 47.77
periodicity(to.period(matrix_xts, 'months'))
## Monthly periodicity from 2007-01-31 to 2007-06-30
apply.monthly(matrix_xts[, 4], FUN = max)
## Close
## 2007-01-31 50.68
## 2007-02-28 51.18
## 2007-03-31 50.62
## 2007-04-30 50.33
## 2007-05-31 49.59
## 2007-06-30 47.77
# see also apply.daily, etc...
# merging XTS objtects
(x <- xts(4:10, Sys.Date() + 4:10))
## [,1]
## 2014-08-26 4
## 2014-08-27 5
## 2014-08-28 6
## 2014-08-29 7
## 2014-08-30 8
## 2014-08-31 9
## 2014-09-01 10
(y <- xts(1:6, Sys.Date() + 1:6))
## [,1]
## 2014-08-23 1
## 2014-08-24 2
## 2014-08-25 3
## 2014-08-26 4
## 2014-08-27 5
## 2014-08-28 6
merge(x, y)
## x y
## 2014-08-23 NA 1
## 2014-08-24 NA 2
## 2014-08-25 NA 3
## 2014-08-26 4 4
## 2014-08-27 5 5
## 2014-08-28 6 6
## 2014-08-29 7 NA
## 2014-08-30 8 NA
## 2014-08-31 9 NA
## 2014-09-01 10 NA
merge(x, y, join = 'inner')
## x y
## 2014-08-26 4 4
## 2014-08-27 5 5
## 2014-08-28 6 6
merge(x, y, join = 'left')
## x y
## 2014-08-26 4 4
## 2014-08-27 5 5
## 2014-08-28 6 6
## 2014-08-29 7 NA
## 2014-08-30 8 NA
## 2014-08-31 9 NA
## 2014-09-01 10 NA
merge(x, y, join = 'right')
## x y
## 2014-08-23 NA 1
## 2014-08-24 NA 2
## 2014-08-25 NA 3
## 2014-08-26 4 4
## 2014-08-27 5 5
## 2014-08-28 6 6
Lubridate pacakge:
http://cran.r-project.org/web/packages/lubridate/vignettes/lubridate.html
Take a look at the quantmod package (library(“quantmod”))
library("quantmod")
## Loading required package: Defaults
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("reshape2")
library("ggplot2")
aapl <- getSymbols("AAPL", auto.assign = FALSE)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbol for more details
ibm <- getSymbols("IBM", auto.assign = FALSE)
aapl <- aapl[, 'AAPL.Adjusted']
ibm <- ibm[, 'IBM.Adjusted']
stocks <- merge(aapl, ibm, join = "inner")
head(stocks)
## AAPL.Adjusted IBM.Adjusted
## 2007-01-03 11.39 84.57
## 2007-01-04 11.64 85.48
## 2007-01-05 11.56 84.70
## 2007-01-08 11.61 85.99
## 2007-01-09 12.58 87.01
## 2007-01-10 13.18 85.98
toLong <- function (x) {
x.df <- as.data.frame(x)
melted <- melt(as.matrix(x.df))
names(melted) <- c("date", "ticker", "price")
melted$date <- as.Date(melted$date)
return(melted)
}
long.df <- toLong(stocks)
head(long.df)
## date ticker price
## 1 2007-01-03 AAPL.Adjusted 11.39
## 2 2007-01-04 AAPL.Adjusted 11.64
## 3 2007-01-05 AAPL.Adjusted 11.56
## 4 2007-01-08 AAPL.Adjusted 11.61
## 5 2007-01-09 AAPL.Adjusted 12.58
## 6 2007-01-10 AAPL.Adjusted 13.18
qplot(date, price, colour = ticker, geom = "line", data = long.df) +
theme_bw()
logRet <- function(x) {
diff(log(x))
}
rets <- apply(stocks, 2, logRet)
head(rets)
## AAPL.Adjusted IBM.Adjusted
## 2007-01-04 0.021712 0.010703
## 2007-01-05 -0.006897 -0.009167
## 2007-01-08 0.004316 0.015115
## 2007-01-09 0.080241 0.011792
## 2007-01-10 0.046592 -0.011908
## 2007-01-11 -0.012214 -0.002445
rets.long <- toLong(rets)
qplot(date, price, colour = ticker, geom = "line", data = rets.long) +
theme_bw()
cum.rets <- apply(rets, 2, cumsum)
cum.long <- toLong(cum.rets)
qplot(date, price, colour = ticker, geom = "line", data = cum.long) +
theme_bw()
Explore the relationship between IBM and AAPL returns
theme_set(theme_bw())
qplot(AAPL.Adjusted, IBM.Adjusted, data = as.data.frame(rets)) +
stat_smooth(method = "lm")
qplot(AAPL.Adjusted, IBM.Adjusted, data = as.data.frame(rets)) +
stat_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
Come up with the estimate for beta for IBM ~ AAPL, and a confidence interval
ret.lm <- lm(rets[, 'IBM.Adjusted'] ~ rets[, 'AAPL.Adjusted'])
summary(ret.lm)
##
## Call:
## lm(formula = rets[, "IBM.Adjusted"] ~ rets[, "AAPL.Adjusted"])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08523 -0.00641 -0.00021 0.00683 0.09011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.65e-05 2.89e-04 0.2 0.84
## rets[, "AAPL.Adjusted"] 3.25e-01 1.30e-02 25.0 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0126 on 1920 degrees of freedom
## Multiple R-squared: 0.245, Adjusted R-squared: 0.245
## F-statistic: 623 on 1 and 1920 DF, p-value: <2e-16
beta <- coef(ret.lm)[2]
error <- summary(ret.lm)$coefficients[2, 2]
beta + 2 * error
## rets[, "AAPL.Adjusted"]
## 0.3507
beta - 2 * error
## rets[, "AAPL.Adjusted"]
## 0.2987
plot(resid(ret.lm) ~ fitted(ret.lm))
qqnorm(resid(ret.lm))
qqline(resid(ret.lm))
acf(resid(ret.lm))
pacf(resid(ret.lm))
library('mvtnorm')
library('reshape2')
# supress warnings
# options(warn = -1)
#### INPUT PARAMETERS
n <- 1000
covar <- 2
var1 <- 10
var2 <- 10
mean1 <- 1
mean2 <- 2
#### OUTPUT PARAMETERS
# x <- Wide output matrix
# x.long <- Long output matrix
sigma <- matrix(c(var1, covar, covar, var2), ncol = 2)
head(sigma)
## [,1] [,2]
## [1,] 10 2
## [2,] 2 10
x <- rmvnorm(n = n, mean = c(mean1, mean2), sigma = sigma)
x <- data.frame(rv1 = rnorm(n), rv2 = rnorm(n), mv1 = x[, 1], mv2 = x[, 2])
head(x)
## rv1 rv2 mv1 mv2
## 1 -2.0637 0.7351 4.5533 8.74324
## 2 -0.6015 0.1468 -1.2789 -5.42132
## 3 0.5860 -0.7108 3.9389 0.08833
## 4 -0.2945 1.1056 -0.8387 5.00804
## 5 -0.8005 -0.8857 -0.2840 -1.93121
## 6 -0.6357 0.6948 1.5263 1.62064
x.long <- melt(x)
## No id variables; using all as measure variables
head(x.long)
## variable value
## 1 rv1 -2.0637
## 2 rv1 -0.6015
## 3 rv1 0.5860
## 4 rv1 -0.2945
## 5 rv1 -0.8005
## 6 rv1 -0.6357
library("plyr")
head(diamonds)
## carat cut color clarity depth table price x y z
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
summary(diamonds)
## carat cut color clarity
## Min. :0.200 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.400 Good : 4906 E: 9797 VS2 :12258
## Median :0.700 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.798 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.040 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.010 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.0 Min. :43.0 Min. : 326 Min. : 0.00
## 1st Qu.:61.0 1st Qu.:56.0 1st Qu.: 950 1st Qu.: 4.71
## Median :61.8 Median :57.0 Median : 2401 Median : 5.70
## Mean :61.8 Mean :57.5 Mean : 3933 Mean : 5.73
## 3rd Qu.:62.5 3rd Qu.:59.0 3rd Qu.: 5324 3rd Qu.: 6.54
## Max. :79.0 Max. :95.0 Max. :18823 Max. :10.74
##
## y z
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 4.72 1st Qu.: 2.91
## Median : 5.71 Median : 3.53
## Mean : 5.73 Mean : 3.54
## 3rd Qu.: 6.54 3rd Qu.: 4.04
## Max. :58.90 Max. :31.80
##
qplot(carat, price, data = diamonds, colour = color, alpha = I(1/10))
d.price <- ddply(diamonds, .(color), summarise, mean(price))
names(d.price)[2] <- "price"
d.price <- d.price[with(d.price, order(-price)), ]
d.price
## color price
## 7 J 5324
## 6 I 5092
## 5 H 4487
## 4 G 3999
## 3 F 3725
## 1 D 3170
## 2 E 3077
d.price.carat <- ddply(diamonds, .(color), summarise, mean(price / carat))
names(d.price.carat)[2] <- "price.carat"
d.price.carat <- arrange(d.price.carat, desc(price.carat))
d.price.carat
## color price.carat
## 1 G 4163
## 2 F 4135
## 3 H 4008
## 4 I 3996
## 5 D 3953
## 6 J 3826
## 7 E 3805
ggplot(data = d.price.carat,
aes(x = color, y = price.carat)) +
geom_bar(stat = "identity")
spy <- getSymbols("SPY", auto.assign = FALSE)
spy <- spy[, ncol(spy)]
plot(spy)
spy.ret <- logRet(spy)
plot(spy.ret)
acf(spy.ret[-1])
pacf(spy.ret[-1])
# ?arima
library("forecast")
## Loading required package: timeDate
## This is forecast 5.5
##
##
## Attaching package: 'forecast'
##
## The following object is masked from 'package:nlme':
##
## getResponse
spy.arima <- auto.arima(spy.ret, max.p = 20, max.q = 20, stationary = TRUE,
seasonal = FALSE)
summary(spy.arima)
## Series: spy.ret
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## -0.101 -0.089
## s.e. 0.023 0.023
##
## sigma^2 estimated as 0.000198: log likelihood=5467
## AIC=-10929 AICc=-10929 BIC=-10912
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0003096 0.01407 0.009123 NaN Inf 0.6714 0.001532
acf(resid(spy.arima)[-1])
pacf(resid(spy.arima)[-1])
spy.sim <- simulate(spy.arima, 1900)
plot(spy.sim)
acf(resid(spy.arima)[-1]^2)
plot(forecast(spy.arima, h = 100))
# now let auto.arima pick the d
spy.arima <- auto.arima(spy, max.p = 20, max.q = 20, stationary = FALSE,
seasonal = FALSE)
summary(spy.arima)
## Series: spy
## ARIMA(0,2,3)
##
## Coefficients:
## ma1 ma2 ma3
## -1.087 0.038 0.051
## s.e. 0.023 0.034 0.023
##
## sigma^2 estimated as 2.1: log likelihood=-3443
## AIC=6895 AICc=6895 BIC=6917
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.02093 1.45 1.025 0.009799 0.9073 1.003 -0.001221
acf(resid(spy.arima)[-1])
pacf(resid(spy.arima)[-1])
plot(forecast(spy.arima, h = 100))