## Warning: package 'knitr' was built under R version 3.5.3
d <- autoplot(ibmclose)
e <- autoplot(diff(ibmclose))
plot_grid(d,e)
a <- ggAcf(ibmclose)
b <- ggPacf(ibmclose)
c <- ggAcf(diff(ibmclose))
d <- ggPacf(diff(ibmclose))
plot_grid(a,b,c,d)
# automate stationary search
ndiffs(ibmclose)
## [1] 1
## ljung test
Box.test(ibmclose, type = "Ljung")
##
## Box-Ljung test
##
## data: ibmclose
## X-squared = 367.11, df = 1, p-value < 2.2e-16
Box.test(diff(ibmclose), type = "Ljung")
##
## Box-Ljung test
##
## data: diff(ibmclose)
## X-squared = 2.717, df = 1, p-value = 0.09929
lambda <- BoxCox.lambda(usnetelec)
us_net_log_trans <- BoxCox(usnetelec,lambda)
a <- autoplot(usnetelec)
b <- autoplot(us_net_log_trans)
plot_grid(a,b)
## automate search
ndiffs(usnetelec)
## [1] 1
ndiffs(us_net_log_trans)
## [1] 2
a <- ggAcf(usnetelec,lag=55)
b <- ggPacf(usnetelec, lag=55)
c <- ggAcf(diff(usnetelec),lag=55)
d <- ggPacf(diff(usnetelec),lag=55)
e <- ggAcf(diff(diff(us_net_log_trans)),lag=55)
f <- ggPacf(diff(diff(us_net_log_trans)),lag=55)
plot_grid(a,b,c,d,e,f, ncol=2)
data_1_differencing <- autoplot(diff(usnetelec))
log_trans_2differencing <- autoplot(diff(diff(us_net_log_trans)))
plot_grid(data_1_differencing,log_trans_2differencing,labels=c('data_1_differencing','log_trans_2differencing'))
lambda <- BoxCox.lambda(usgdp)
usgdp_log_trans <- BoxCox(usgdp,lambda)
#usgdp
a <- autoplot(usgdp)
b <- autoplot(usgdp_log_trans)
plot_grid(a,b)
## automate search
ndiffs(usgdp)
## [1] 2
ndiffs(usgdp_log_trans)
## [1] 1
## regular data
a <- ggAcf(usgdp)
b <- ggPacf(usgdp)
c <- ggAcf(diff(usgdp))
d <- ggPacf(diff(usgdp))
e <- ggAcf(diff(diff(usgdp)))
f <- ggPacf(diff(diff(usgdp)))
plot_grid(a,b,c,d,e,f,ncol=2)
## log data
a <- ggAcf(usgdp_log_trans)
b <- ggPacf(usgdp_log_trans)
c <- ggAcf(diff(usgdp_log_trans))
d <- ggPacf(diff(usgdp_log_trans))
plot_grid(a,b,c,d)
autoplot(diff(usgdp_log_trans))
autoplot(diff(diff(usgdp)))
plot_grid(autoplot(diff(usgdp_log_trans)),autoplot(diff(diff(usgdp))))
lambda <- BoxCox.lambda(mcopper)
mcopper_log_trans <- BoxCox(mcopper,lambda)
a <- autoplot(mcopper)
b <- autoplot(mcopper_log_trans)
plot_grid(a,b)
## automate search
ndiffs(mcopper)
## [1] 1
ndiffs(mcopper_log_trans)
## [1] 1
## regular data
a <- ggAcf(mcopper)
b <- ggPacf(mcopper)
c <- ggAcf(diff(mcopper))
d <- ggPacf(diff(mcopper))
#e <- ggAcf(diff(diff(usgdp)))
#f <- ggPacf(diff(diff(usgdp)))
plot_grid(a,b,c,d)
## log data
a <- ggAcf(mcopper_log_trans)
b <- ggPacf(mcopper_log_trans)
c <- ggAcf(diff(mcopper_log_trans))
d <- ggPacf(diff(mcopper_log_trans))
plot_grid(a,b,c,d)
autoplot(diff(mcopper_log_trans))
autoplot(diff(diff(mcopper)))
lambda <- BoxCox.lambda(enplanements)
enplanements_log_trans <- BoxCox(enplanements,lambda)
a <- autoplot(enplanements)
b <- autoplot(enplanements_log_trans)
plot_grid(a,b)
## automate search
ndiffs(enplanements)
## [1] 1
ndiffs(enplanements_log_trans)
## [1] 1
ndiffs(diff (log(enplanements_log_trans),12) )
## [1] 1
## regular data
a <- ggAcf(enplanements)
b <- ggPacf(enplanements)
c <- ggAcf(diff(enplanements))
d <- ggPacf(diff(enplanements))
#e <- ggAcf(diff(diff(usgdp)))
#f <- ggPacf(diff(diff(usgdp)))
plot_grid(a,b,c,d)
## log data
a <- ggAcf(enplanements_log_trans)
b <- ggPacf(enplanements_log_trans)
c <- ggAcf(diff(enplanements_log_trans))
d <- ggPacf(diff(enplanements_log_trans))
plot_grid(a,b,c,d)
a <- autoplot(diff(enplanements))
b <- autoplot(diff(enplanements_log_trans))
c <- autoplot(diff(diff(log(enplanements_log_trans),12)))
plot_grid(a,b,c)
lambda <- BoxCox.lambda(visitors)
visitors_log_trans <- BoxCox(visitors,lambda)
a <- autoplot(visitors)
b <- autoplot(visitors_log_trans)
plot_grid(a,b)
## automate search
ndiffs(visitors)
## [1] 1
ndiffs(visitors_log_trans)
## [1] 1
ndiffs(diff (log(visitors_log_trans),12) )
## [1] 1
## regular data
a <- ggAcf(visitors)
b <- ggPacf(visitors)
c <- ggAcf(diff(visitors))
d <- ggPacf(diff(visitors))
#e <- ggAcf(diff(diff(usgdp)))
#f <- ggPacf(diff(diff(usgdp)))
plot_grid(a,b,c,d)
## log data
a <- ggAcf(visitors_log_trans)
b <- ggPacf(visitors_log_trans)
c <- ggAcf(diff(visitors_log_trans))
d <- ggPacf(diff(visitors_log_trans))
e <- ggAcf(diff(diff(visitors_log_trans)))
f <- ggPacf(diff(diff(visitors_log_trans)))
plot_grid(a,b,c,d,e,f,ncol=2)
##
a <- ggAcf(diff(log(visitors),12))
b <- ggPacf(diff(log(visitors),12))
c <- ggAcf(diff(diff(log(visitors),12)))
d <- ggPacf(diff(diff(log(visitors),12)))
plot_grid(a,b,c,d)
a <- autoplot(diff(enplanements))
b <- autoplot(diff(visitors_log_trans))
c <- autoplot(diff(diff(log(visitors_log_trans),12)))
plot_grid(a,b,c)
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,15],
frequency=12, start=c(1982,4))
#myts
#autoplot(myts)
lambda <- BoxCox.lambda(myts)
#autoplot(BoxCox(myts,lambda))
plot_grid(autoplot(BoxCox(myts,lambda)),autoplot(myts),labels = c('Lambda trans', 'original') )
lambda <- BoxCox.lambda(myts)
myts_log_trans <- BoxCox(myts,lambda)
## automate search
ndiffs(myts)
## [1] 1
ndiffs(myts_log_trans)
## [1] 1
ndiffs(diff (log(myts_log_trans),12) )
## [1] 1
## regular data
a <- ggAcf(myts)
b <- ggPacf(myts)
c <- ggAcf(diff(myts))
d <- ggPacf(diff(myts))
#e <- ggAcf(diff(diff(usgdp)))
#f <- ggPacf(diff(diff(usgdp)))
plot_grid(a,b,c,d)
## log data
a <- ggAcf(myts_log_trans)
b <- ggPacf(myts_log_trans)
c <- ggAcf(diff(myts_log_trans))
d <- ggPacf(diff(myts_log_trans))
e <- ggAcf(diff(diff(myts_log_trans)))
f <- ggPacf(diff(diff(myts_log_trans)))
plot_grid(a,b,c,d,e,f,ncol=2)
##
a <- ggAcf(diff(log(myts),12))
b <- ggPacf(diff(log(myts),12))
c <- ggAcf(diff(diff(log(myts),12)))
d <- ggPacf(diff(diff(log(myts),12)))
plot_grid(a,b,c,d)
a <- autoplot(diff(myts))
aa <- autoplot(diff(diff(myts)))
b <- autoplot(diff(myts_log_trans))
c <- autoplot(diff(diff(log(myts),12)))
plot_grid(a,aa,b,c)
set.seed(2)
y <- ts(numeric(100))
e <- rnorm(100)
c <- sample(-10:10,100,replace=TRUE)
for(i in 2:100){
y[i] <- .6*y[i-1] + e[i]
}
autoplot(y)
set.seed(2)
y4 <- ts(numeric(100))
for(i in 2:100)
y4[i] <- 0*y4[i-1] + e[i]
y3 <- ts(numeric(100))
for(i in 2:100)
y3[i] <- y3[i-1] + e[i]
y2 <- ts(numeric(100))
for(i in 2:100){
y2[i] <- -.5*y2[i-1] + e[i]
}
y5 <- ts(numeric(100))
for(i in 2:100){
y5[i] <- c[i]+1*y5[i-1] + e[i]
}
plot_grid(autoplot(y),autoplot(y2),autoplot(y3),autoplot(y4),autoplot(y5),labels=c("original", 'oscillating',"random walk","white Noise","randwom walk with drift"))
\[ x(t) = \beta * E(t-1) + E(t)\]
set.seed(2)
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*e[i-1] + e[i]
autoplot(y)
set.seed(2)
y4 <- ts(numeric(100))
for(i in 2:100)
y4[i] <- 0*e[i-1] + e[i]
y3 <- ts(numeric(100))
for(i in 2:100)
y3[i] <- e[i-1] + e[i]
y2 <- ts(numeric(100))
for(i in 2:100){
y2[i] <- -.5**e[i-1] + e[i]
}
y5 <- ts(numeric(100))
for(i in 2:100){
y5[i] <- c[i]+1*e[i-1] + e[i]
}
plot_grid(autoplot(y),autoplot(y2),autoplot(y3),autoplot(y4),autoplot(y5))
set.seed(1)
arma.sim<-arima.sim(model=list(ar=c(.6),ma=c(.6) ),n=100)
autoplot(arma.sim)
## check to see coefficents of model add up
arima(arma.sim,c(1,0,1),incl=FALSE)
##
## Call:
## arima(x = arma.sim, order = c(1, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1
## 0.6162 0.5782
## s.e. 0.0892 0.0962
##
## sigma^2 estimated as 0.8027: log likelihood = -131.65, aic = 269.3
#arma.sim
#arima.sim(model=list(ar=c(-.8,.3)),n=100)
eps=rnorm(100)
xx=ts(numeric(100))
a=-.8
b=.3
xx[2]=a*xx[1]+eps[2]
for (t in 3:100)
xx[t]=a* xx[t-1]+ b*xx[t-2]
#xx
eps=rnorm(100)
y=ts(numeric(100))
a=-.8
b=.3
y[2]=a*y[1]+eps[2]
for (t in 3:100) {
y[t]=a* y[t-1]+ b*y[t-2]+eps[t]
# print(eps[t])
}
y
## Time Series:
## Start = 1
## End = 100
## Frequency = 1
## [1] 0.0000000 1.5197450 -1.5245366 0.4222630 -0.1529301
## [6] 0.2043138 -1.9425485 1.6174648 -2.5070367 2.1499003
## [11] -3.6286036 5.3509949 -5.7005090 4.5601922 -5.1611131
## [16] 5.7601238 -7.1422596 4.5529242 -6.4254989 7.0767840
## [21] -7.6488002 8.1438966 -8.2489366 7.8558596 -7.6625916
## [26] 8.4814872 -8.3766566 10.2798791 -10.5134199 10.6159920
## [31] -10.4838550 9.5717167 -11.3473206 11.6937008 -12.9252778
## [36] 14.8687964 -15.6363986 17.3769254 -18.6621147 19.8951050
## [41] -20.8191676 23.7700939 -27.6649216 29.8357051 -31.7933161
## [46] 33.9600967 -35.7550594 38.4028393 -41.7331200 45.7647575
## [51] -47.4121147 51.9291739 -56.1891576 59.3409650 -64.6605522
## [56] 68.5909019 -74.5298198 80.5955056 -87.6872075 96.9775846
## [61] -103.7322182 113.2092572 -123.9761952 133.8847345 -145.6168913
## [66] 157.5787371 -169.3499269 182.3460341 -195.3575467 210.2886159
## [71] -227.4187710 244.0205294 -264.1102334 285.4395305 -307.1509923
## [76] 332.3578122 -358.4216661 386.8210469 -416.7391724 448.0113947
## [81] -481.6524382 519.8598166 -559.6179857 604.6074702 -651.6219376
## [86] 702.3739757 -756.4920882 814.8585651 -876.8631411 945.5644503
## [91] -1017.8563573 1099.4666336 -1184.8472484 1278.2850097 -1379.1067308
## [96] 1487.0938940 -1602.3635220 1728.1180643 -1863.6576449 2008.7057534
a <- arima(y,c(2,0,0),incl=FALSE,optim.control = list(maxit = 1000),method="CSS")
## Warning in arima(y, c(2, 0, 0), incl = FALSE, optim.control = list(maxit =
## 1000), : possible convergence problem: optim gave code = 1
a$coef
## ar1 ar2
## -0.8312010 0.2662241
autoplot(ts(xx[1:50]))
series <- rnorm(1000)
y.st <- stats::filter(series, filter = c(-.8,.3), method='recursive')
r2.st <- Arima(y.st, c(2, 0, 0), include.mean=FALSE, transform.pars=FALSE,
method="CSS")
#autoplot(ts(r2.st$residuals[1:100]))+
# autolayer(arma.sim)
plot_grid(autoplot(ts(y)), autoplot(arma.sim))
lambda <- BoxCox.lambda(wmurders)
lambda_trans_murdersx <- BoxCox(wmurders,lambda)
auto.arima(wmurders)
## Series: wmurders
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.2434 -0.8261
## s.e. 0.1553 0.1143
##
## sigma^2 estimated as 0.04632: log likelihood=6.44
## AIC=-6.88 AICc=-6.39 BIC=-0.97
autoplot(wmurders)
autoplot(lambda_trans_murdersx)
autoplot(diff(wmurders))
autoplot(diff(diff(wmurders)))
ndiffs(wmurders)
## [1] 2
ndiffs(lambda_trans_murdersx)
## [1] 2
a <- ggAcf(wmurders)
b <- ggPacf(wmurders)
c <- ggAcf(diff(diff(wmurders)))
d <- ggPacf(diff(diff(wmurders)))
e <- ggAcf(diff(wmurders))
f <- ggPacf(diff(wmurders))
plot_grid(a,b,c,d,e,f, ncol=2)
library(tseries)
tseries::adf.test(diff(wmurders),alternative = "stationary" )
##
## Augmented Dickey-Fuller Test
##
## data: diff(wmurders)
## Dickey-Fuller = -3.7688, Lag order = 3, p-value = 0.02726
## alternative hypothesis: stationary
## ar term added no differencing
first_mod <- Arima(wmurders,order=c(1,0,0) )
ggtsdisplay(first_mod$residuals)
checkresiduals(first_mod)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 12.887, df = 8, p-value = 0.1158
##
## Model df: 2. Total lags used: 10
## 1 differencing and 2 ar term added
second_mod <- Arima(wmurders,order=c(2,1,0) )
checkresiduals(second_mod)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)
## Q* = 9.9569, df = 8, p-value = 0.2681
##
## Model df: 2. Total lags used: 10
ggtsdisplay(second_mod$residuals)
summary(second_mod)
## Series: wmurders
## ARIMA(2,1,0)
##
## Coefficients:
## ar1 ar2
## -0.0572 0.2967
## s.e. 0.1277 0.1275
##
## sigma^2 estimated as 0.04265: log likelihood=9.48
## AIC=-12.96 AICc=-12.48 BIC=-6.99
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001232357 0.2008046 0.1544929 -0.06022595 4.436949 0.950059
## ACF1
## Training set 0.005925284
## 1 differencing and 2ma term added
third_mod <- Arima(wmurders,order=c(0,1,2) )
checkresiduals(third_mod)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)
## Q* = 9.7748, df = 8, p-value = 0.2812
##
## Model df: 2. Total lags used: 10
summary(third_mod)
## Series: wmurders
## ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## -0.0660 0.3712
## s.e. 0.1263 0.1640
##
## sigma^2 estimated as 0.0422: log likelihood=9.71
## AIC=-13.43 AICc=-12.95 BIC=-7.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.0007242355 0.1997392 0.1543531 -0.08224024 4.434684
## MASE ACF1
## Training set 0.9491994 0.005880608
second_mod
## Series: wmurders
## ARIMA(2,1,0)
##
## Coefficients:
## ar1 ar2
## -0.0572 0.2967
## s.e. 0.1277 0.1275
##
## sigma^2 estimated as 0.04265: log likelihood=9.48
## AIC=-12.96 AICc=-12.48 BIC=-6.99
add_constant <- Arima(wmurders, order=c(2,1,0), include.drift=TRUE)
checkresiduals(add_constant)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0) with drift
## Q* = 9.9554, df = 7, p-value = 0.1911
##
## Model df: 3. Total lags used: 10
summary(add_constant)
## Series: wmurders
## ARIMA(2,1,0) with drift
##
## Coefficients:
## ar1 ar2 drift
## -0.0573 0.2965 0.0012
## s.e. 0.1278 0.1276 0.0358
##
## sigma^2 estimated as 0.04348: log likelihood=9.48
## AIC=-10.96 AICc=-10.14 BIC=-3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.0003107545 0.2008027 0.1544222 -0.08883056 4.435251
## MASE ACF1
## Training set 0.9496239 0.006043919
SO we have an AR 2 term with 1 level of differencing and a constant. Backwards form
\[ (1-\phi_1 B -\phi_2B^2)(1-B)y_t = c+e_t \]
prediction <- forecast::forecast(object = add_constant,h=3 )
#add_constant$coef[2]
#add_constant$x[53]
by_hand <- add_constant$x[55]*(1+add_constant$coef[1])-add_constant$x[54]*(add_constant$coef[1]-add_constant$coef[2])-add_constant$x[53]*add_constant$coef[2]+add_constant$coef[3]
round(by_hand,2)==round(prediction$mean[1],2)
## ar1
## TRUE
autoplot(prediction)
auto_model <- auto.arima( wmurders, approximation = FALSE)
checkresiduals(auto_model)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)
## Q* = 12.419, df = 8, p-value = 0.1335
##
## Model df: 2. Total lags used: 10
summary(auto_model)
## Series: wmurders
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.2434 -0.8261
## s.e. 0.1553 0.1143
##
## sigma^2 estimated as 0.04632: log likelihood=6.44
## AIC=-6.88 AICc=-6.39 BIC=-0.97
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01065956 0.2072523 0.1528734 -0.2149476 4.335214 0.9400996
## ACF1
## Training set 0.02176343
farima <- function(x, h) {
forecast(forecast::auto.arima(x), h=h)
}
e2 <- forecast::tsCV(wmurders, farima)
far2 <- function(x, h){forecast(Arima(x, order=c(2,1,0), include.drift=TRUE), h=h)}
e3 <- forecast::tsCV(wmurders, far2)
auto_arima_error <- mean(e2^2, na.rm=TRUE)
my_arima_error <- mean(e3^2, na.rm=TRUE)
c(auto_arima_error,my_arima_error)
## [1] 0.05713716 0.05355224