suppressMessages(suppressWarnings(library(fpp2)))
suppressMessages(suppressWarnings(library(readxl)))
suppressMessages(suppressWarnings(library(seasonal)))
suppressMessages(suppressWarnings(library(rdatamarket)))
suppressMessages(suppressWarnings(library(tseries)))
plot(ibmclose)
plot(acf(ibmclose))
plot(pacf(ibmclose))
transform.plot <- function(x){
lam <- BoxCox.lambda(x)
transformed <- BoxCox(x, lambda = lam)
tsdisplay(transformed)
return(paste("Box-Cox Lambda = ", round(lam, 4)))
}
data("usnetelec")
transform.plot(usnetelec)
## [1] "Box-Cox Lambda = 0.5168"
data("usgdp")
transform.plot(usgdp)
## [1] "Box-Cox Lambda = 0.3664"
data("mcopper")
transform.plot(mcopper)
## [1] "Box-Cox Lambda = 0.1919"
data("enplanements")
transform.plot(enplanements)
## [1] "Box-Cox Lambda = -0.2269"
data("visitors")
transform.plot(visitors)
## [1] "Box-Cox Lambda = 0.2775"
retaildata <- readxl::read_excel("C:/Users/rites/Documents/GitHub/Data624_Assignment1/retail.xlsx", skip=1)
## readxl works best with a newer version of the tibble package.
## You currently have tibble v1.4.2.
## Falling back to column name repair from tibble <= v1.4.2.
## Message displays once per session.
myts <- ts(retaildata[,"A3349873A"],frequency=12, start=c(1982,4))
autoplot(myts) + xlab("Time") + ylab("Sales")
This data have increasing trend and strong seasonality, so use first differencing and seasonal differencing and then apply Box-Cox transformation.
ndiffs(myts)
## [1] 1
nsdiffs(myts)
## [1] 1
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100){
y[i] <- 0.6*y[i-1] + e[i]
}
ar1generator <- function(phi1){
# generate 100 data points from an AR(1) model with input phi1.
y <- ts(numeric(100))
# error 'e's have variation sigma^2 as 1.
e <- rnorm(100)
for(i in 2:100){
y[i] <- phi1*y[i-1] + e[i]
}
return(y)
}
# produce plots changing phi1 value.
autoplot(ar1generator(0.3), series = "0.3") + geom_line(size = 1, colour = "red") + autolayer(y, series = "0.6", size = 1) + autolayer(ar1generator(0.9), size = 1, series = "0.9") + ylab("AR(1) models") + guides(colour = guide_legend(title = "Phi1"))
ma1generator <- function(theta1){
# generate 100 data points from an MA(1) model with input theta1.
y <- ts(numeric(100))
# error 'e's have variation sigma^2 as 1.
e <- rnorm(100)
for(i in 2:100){
y[i] <- theta1*e[i-1] + e[i]
}
return(y)
}
# produce plots changing theta1 value.
autoplot(ma1generator(0.3), series = "0.3") + geom_line(size = 1, colour = "red") + autolayer(y, series = "0.6", size = 1) + autolayer(ar1generator(0.9), size = 1, series = "0.9") + ylab("MA(1) models") + guides(colour = guide_legend(title = "Theta1"))
y_arima.1.0.1 <- ts(numeric(50))
e <- rnorm(50)
for(i in 2:50){
y_arima.1.0.1[i] <- 0.6*y_arima.1.0.1[i-1] + 0.6*e[i-1] + e[i]
}
y_arima.2.0.0 <- ts(numeric(50))
e <- rnorm(50)
for(i in 3:50){
y_arima.2.0.0[i] <- -0.8*y_arima.2.0.0[i-1] + 0.3*y_arima.2.0.0[i-2] + e[i]
}
autoplot(y_arima.1.0.1, series = "ARMA(1, 1)") + autolayer(y_arima.2.0.0, series = "AR(2)") + ylab("y") + guides(colour = guide_legend(title = "Models"))
autoplot(y_arima.1.0.1)
autoplot(wmurders)
autoplot(diff(wmurders))
ndiffs(wmurders)
## [1] 2
autoplot(diff(wmurders, differences = 2))
kpss.test(diff(wmurders, differences = 2))
## Warning in kpss.test(diff(wmurders, differences = 2)): p-value greater than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(wmurders, differences = 2)
## KPSS Level = 0.045793, Truncation lag parameter = 3, p-value = 0.1
diff(wmurders, differences = 2) %>% ggtsdisplay()
wmurders_arima.0.2.2 <- Arima(wmurders,order = c(0, 2, 2))
checkresiduals(wmurders_arima.0.2.2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,2)
## Q* = 11.764, df = 8, p-value = 0.1621
##
## Model df: 2. Total lags used: 10
fc_wmurders_arima.0.2.2 <- forecast( wmurders_arima.0.2.2, h = 3)
# forecasts by Arima function
fc_wmurders_arima.0.2.2$mean
## Time Series:
## Start = 2005
## End = 2007
## Frequency = 1
## [1] 2.480525 2.374890 2.269256
# get forecasts by manual calculation
fc_wmurders_arima.0.2.2$model
## Series: wmurders
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -1.0181 0.1470
## s.e. 0.1220 0.1156
##
## sigma^2 estimated as 0.04702: log likelihood=6.03
## AIC=-6.06 AICc=-5.57 BIC=-0.15
# formula
# (1 - B)^2*yt = (1 - 1.0181*B + 0.1470*B^2)*et
# yt = 2yt-1 - yt-2 + et - 1.0181*et-1 + 0.1470*et-2
years <- length(wmurders)
e <- fc_wmurders_arima.0.2.2$residuals
fc1 <- 2*wmurders[years] - wmurders[years - 1] - 1.0181*e[years] + 0.1470*e[years - 1]
fc2 <- 2*fc1 - wmurders[years] + 0.1470*e[years]
fc3 <- 2*fc2 - fc1
# forecasts by manual calculation
c(fc1, fc2, fc3)
## [1] 2.480523 2.374887 2.269252
autoplot(fc_wmurders_arima.0.2.2)
fc_wmurders_autoarima <- forecast(auto.arima(wmurders), h = 3)
accuracy(fc_wmurders_arima.0.2.2)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0113461 0.2088162 0.1525773 -0.2403396 4.331729 0.9382785
## ACF1
## Training set -0.05094066
accuracy(fc_wmurders_autoarima)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01065956 0.2072523 0.1528734 -0.2149476 4.335214 0.9400996
## ACF1
## Training set 0.02176343
fc_wmurders_autoarima2 <- forecast(
auto.arima(wmurders, stepwise = FALSE, approximation = FALSE), h = 3)
accuracy(fc_wmurders_autoarima2)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01336585 0.2016929 0.1531053 -0.3332051 4.387024 0.9415259
## ACF1
## Training set -0.03193856
ggtsdisplay(diff(wmurders, differences = 2))
checkresiduals(fc_wmurders_arima.0.2.2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,2)
## Q* = 11.764, df = 8, p-value = 0.1621
##
## Model df: 2. Total lags used: 10
checkresiduals(fc_wmurders_autoarima2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,3)
## Q* = 10.706, df = 7, p-value = 0.152
##
## Model df: 3. Total lags used: 10