library('TSA')
## Loading required package: leaps
## Loading required package: locfit
## locfit 1.5-9.1 2013-03-22
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-22. For overview type 'help("mgcv-package")'.
## Loading required package: tseries
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library('tseries')
library('forecast')
##
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
##
## getResponse
TSA1 = read.csv("beer_assignment.csv")
head(TSA1)
## OzBeer
## 1 284.4
## 2 212.8
## 3 226.9
## 4 308.4
## 5 262.0
## 6 227.9
attach(TSA1)
timeseries = ts(TSA1,frequency =12,start = c(2012,1))
timeseries
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 2012 284.4 212.8 226.9 308.4 262.0 227.9 236.1 320.4 271.9 232.8 237.0
## 2013 261.4 226.8 249.9 314.3 286.1 226.5 260.4 311.4 294.7 232.6 257.2
## 2014 279.1 249.8 269.8 345.7 293.8 254.7 277.5 363.4 313.4 272.8 300.1
## 2015 330.8 287.8 305.9 386.1 335.2 288.0 308.3 402.3 352.8 316.1 324.9
## 2016 393.0 318.9 327.0 442.3 383.1 331.6 361.4 445.9 386.6 357.2 373.6
## 2017 409.6 369.8 378.6 487.0 419.2 376.7 392.8 506.1 458.4 387.4 426.9
## Dec
## 2012 313.4
## 2013 339.2
## 2014 369.5
## 2015 404.8
## 2016 466.2
## 2017 525.0
is.ts(timeseries)
## [1] TRUE
plot.ts(timeseries,col="blue",main="Sales data time series with timestamp")
abline(reg=lm(timeseries~ time(timeseries)),col="red")
#This shows there is clear trend and Sesonality in this data set
Annual_beer = aggregate(timeseries)
str(Annual_beer)
## Time-Series [1:6, 1] from 2012 to 2017: 3134 3260 3590 4043 4587 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "OzBeer"
summary(Annual_beer)
## OzBeer
## Min. :3134
## 1st Qu.:3343
## Median :3816
## Mean :3959
## 3rd Qu.:4451
## Max. :5138
#How does the year on year increase in annual beer consumption look like?
plot(Annual_beer,col="Blue", main = "Anual beer consumption year on year")
#Evaluate the average increase in Beer year on year
Average_beer = aggregate(timeseries, FUN = mean)
Average_beer
## Time Series:
## Start = 2012
## End = 2017
## Frequency = 1
## OzBeer
## [1,] 261.1667
## [2,] 271.7083
## [3,] 299.1333
## [4,] 336.9167
## [5,] 382.2333
## [6,] 428.1250
plot(Average_beer,col="Blue", main = "Average Increase in Customers year on year")
#Evaluate the variance across months in a cycle and within months over multiple cycles
boxplot(timeseries~cycle(timeseries), main="Variation between months and between years")
Kwiatkowski-Phillips-Schmidt-Shin (KPSS) tests are used for testing a null hypothesis that an observable time series is stationary Here the Null Hypothesis is that the Time Series is Stationary
stationary = read.csv("beer_assignment.csv",1)
attach(stationary)
## The following object is masked from TSA1:
##
## OzBeer
beer = ts(stationary, frequency =12)
plot(beer, col="blue", main = "Before stationary differencing")
kpss.test(beer)
## Warning in kpss.test(beer): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: beer
## KPSS Level = 3.0456, Truncation lag parameter = 1, p-value = 0.01
P Values is less than 0.05 so reject the Null Hypothesis which is Data is stationary. So This Time Series is not stationary
Diffrencing Differencing a series d times will make it I(d) series stationary. In addition differencing once will remove any linear trend.
Where seasonal variation is present, one way to remove it is to take a seasonal difference. These are also called “lag-m differences” as we subtract the observation after a lag of m periods.
R has a built in function that allows us to determine how many times we have to conduct trend and seasonal differencing nsdiffs:-estimates the number of seasonal differences.
nsdiffs(beer)
## [1] 1
#This means we have to do 1 diff
beer_diff_seas = diff(beer, lag = frequency(beer),differences = 1)
plot(beer_diff_seas, col="blue", main = "After stationary differencing")
#Visually: Data is not stationary Lets check mathamatically whether data is Stationary
kpss.test(beer_diff_seas)
## Warning in kpss.test(beer_diff_seas): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: beer_diff_seas
## KPSS Level = 2.2781, Truncation lag parameter = 1, p-value = 0.01
#P- Value is 0.01 <0.05 so reject the null Hypothesis.
#Null Hypothesis is Data is Stationary, that means Data is still not stationary
#Lets add in a linear regression equation to this graph to see whether there is any trend in data
plot(beer_diff_seas, col="blue", main = "After stationary differencing")
abline(reg=lm(beer_diff_seas~ time(beer_diff_seas)),col="red")
ndiffs(beer_diff_seas)
## [1] 1
#This means we have to do 1 diff
beer_diff_seas_trend = diff(beer_diff_seas, differences=1)
plot(beer_diff_seas_trend, col = "green", main ="After Trend and Seasonality Differencing")
#Lets test timeseries is stationary after trend difference
kpss.test(beer_diff_seas_trend)
## Warning in kpss.test(beer_diff_seas_trend): p-value greater than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: beer_diff_seas_trend
## KPSS Level = 0.056024, Truncation lag parameter = 1, p-value = 0.1
P- Value is 0.1 > 0.05 so cannot reject the null Hypothesis. Null Hypothesis is Data is Stationary, that means Data is stationary
The classical method of time series decomposition originated in the 1920s and was widely used until the 1950s It still forms the basis of later time series methods, and so it is important to understand how it works The first step in a classical decomposition is to use a moving average method to estimate the trend
beer = read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
beer_ts = ts(beer,frequency=1, start=c(2012))
plot(beer_ts,col="green", main="Beer Consumption")
abline(reg=lm(beer_ts~ time(beer_ts)),col="red")
#There is trend and seasonality in this Time Series data
#now lets check through Moving Average
MA = ma(beer_ts,5)
plot.ts(beer_ts, col="green", main = "Beer Consumption")
lines(MA,col="blue")
MA1 = ma(beer_ts,7)
plot.ts(beer_ts, col="green", main = "Beer Consumption")
lines(MA1,col="blue")
beer = read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from beer (pos = 3):
##
## OzBeer
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
str(beer)
## 'data.frame': 72 obs. of 1 variable:
## $ OzBeer: num 284 213 227 308 262 ...
beer = ts(beer,frequency=4)
plot.ts(OzBeer,col="green",main="Quarterly Beer Sales")
MA = ma(OzBeer,4,T)
#centering is implemented by setting parameter to True (T) or False (F)
lines(MA,col="red")
#De Trending Can be done in 3 Ways 1.Fit a linear regression line 2.Do Moving Average 3.Do differencing
Generally, two types of models are used for Modelling Time Series Additive Model = Trend + Seasonality + Random Multiplicative Model = Trend * Seasonality * Random
The seasonal variation looks constant. It doesn’t change when the time series increases. We should use the additive model
beer = read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from beer (pos = 3):
##
## OzBeer
## The following object is masked from beer (pos = 4):
##
## OzBeer
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
ts_beer = ts(beer, frequency = 4)
plot(ts_beer, col="blue",main="Beer sales every quarter")
In this plot seasonal variation looks constant, so this is a Additive Time Series
To perform the decomposition, it is vital to use a moving window of the exact size of the seasonality. Therefore, to decompose a time series we need to know the seasonality period: weekly, monthly, etc Using a “periodogram” in R we can detect the seasonality
p = periodogram(OzBeer)
From the diagram we can see two peaks one at 0.0 and another at 0.25 1/0.25 = 4 As we know Time =1/frequency Time =1/0.25 = 4 #Step 2A: Detecting Seasonality Mathematically
The signal is the strongest in the beginning and then it is strongest at a frequency of 0.25. The relationship between periodicity and frequency is given by Periodicity = 1/Frequency Periodicity = 1/.25 = 4
dd = data.frame(freq=p$freq,spec=p$spec)
order = dd[order(-dd$spec),]
top2 = head(order,2)
# display the 2 highest "power" frequencies
top2
## freq spec
## 1 0.01388889 175272.3
## 18 0.25000000 105281.0
#So far we have identified the top 2 frequencies corresponding to the top 2 Signals
# convert frequency to time periods
time = 1/top2$freq
time
## [1] 72 4
The highest signal in the periodogram occurred at a frequency of 0.01388889 which correspond to 72 quarters which is the entire data set!So we dont want to take that The next highest signal is detected at a frequency of 0.25 which corresponds to 4 quarters. If we check the graph also we can see a seasonality with each 4 datapoints This gives a periodicity of 4 quarters. The Australian beer production clearly follows an annual seasonality and records data quarterly i.e. 4 times in a year. Use a moving average window of 4.
beer_trend = ma(ts_beer,order = 4, centre = T)
plot(ts_beer, col="blue", main ="Quartely Beer Sales in Australia")
lines(beer_trend, col="green")
plot(beer_trend, col = "green", main = "Trend Line in Time Series")
Removing the previously calculated trend from the time series will result into a new time series clearly exposing the seasonality.
detrend_beer = ts_beer - beer_trend
plot(detrend_beer, col="blue", main = "Time Series after removing the Trend Component")
#Seasonality We have to determine the seasonal part of time series and remove it from the detrended time series The detrended time series is arranged in 4 rows representing Q1,Q2,Q3 and Q4. We have data for 18 years so we get a 4 X 18 matrix The average across each row is computed. This will give us a SEQUENCE of 4 averages representing Q1, Q2, Q3 and Q4 We REPEAT this SEQUENCE 18 time and this forms the Seasonal Component of the time series
m_beer = t(matrix(data = detrend_beer, nrow = 4))
m_beer
## [,1] [,2] [,3] [,4]
## [1,] NA NA -28.4250 53.9875
## [2,] 4.5500 -32.2000 -26.7375 55.7125
## [3,] 6.4875 -31.8500 -25.4625 53.0000
## [4,] 0.1375 -36.1875 -16.2875 45.0625
## [5,] 15.5875 -44.9625 -11.7750 37.3875
## [6,] 20.3250 -44.8500 -21.7750 60.0250
## [7,] -3.8000 -35.4875 -18.1375 55.3125
## [8,] 1.8375 -40.4375 -22.3000 58.8875
## [9,] 3.8000 -40.3875 -16.0250 49.3250
## [10,] 8.0250 -37.7750 -22.3000 57.3250
## [11,] 6.1000 -43.4250 -27.3500 60.9375
## [12,] 5.8500 -33.2375 -29.7750 44.7500
## [13,] 32.3375 -46.7125 -42.0625 72.8875
## [14,] 7.8000 -48.4500 -19.5375 61.3250
## [15,] -2.7000 -36.1625 -25.1750 62.9750
## [16,] 4.1750 -38.8500 -33.8500 72.4875
## [17,] 2.0500 -44.6125 -35.8000 71.2625
## [18,] 17.9625 -59.6625 NA NA
Step 4: Seasonal Variation
seasonal_beer = colMeans(m_beer, na.rm = T)
seasonal_beer
## [1] 7.677941 -40.897059 -24.869118 57.214706
seasonal = rep(seasonal_beer,18)
plot.ts(seasonal, col="blue", main="Seasonal Component of Time Series")
random = detrend_beer-seasonal
plot(random,col="Red", main = "Random Noise")
#Step 6: Reconstruct the time series
reconstruct = beer_trend + seasonal + random
plot(reconstruct,col="Blue", main = "Getting the original time series")
The decomposed time series can logically be re-composed using the model formula. This should reproduce the original signal. Some data are missing at the beginning and the end of the recomposed time series. This is due to the moving average windows who need to ingest some data point before producing average data points. Using Decompose () Decompose a time series into seasonal, trend and irregular components using moving averages. Deals with additive or multiplicative seasonal component.
beer=read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from beer (pos = 3):
##
## OzBeer
## The following object is masked from beer (pos = 4):
##
## OzBeer
## The following object is masked from beer (pos = 5):
##
## OzBeer
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
ts_beer = ts(beer, frequency = 4)
decompose_beer = decompose(ts_beer, "additive")
plot(as.ts(decompose_beer$seasonal))
plot(as.ts(decompose_beer$trend))
plot(as.ts(decompose_beer$random))
plot(decompose_beer)
Simple moving average method assigns equal weights (1/k) to all k data points. Arguably, recent observations provide more relevant information than do observations in the past. So we want a weighting scheme that assigns decreasing weights to the more distant observations. Exponential smoothing methods give larger weights to more recent observations, and the weights decrease exponentially as the observations become more distant. These methods are most effective when the parameters describing the time series are changing SLOWLY over time.
Two Holt-Winters methodsare designed for time series that exhibit linear trend - Additive Holt-Winters method: used for time series with constant (additive) seasonal variations Multiplicative Holt-Winters method: used for time series with increasing (multiplicative) seasonal variations Holt-Winters method is an exponential smoothing approach for handling SEASONAL data. The additive method is preferred when the seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series.
In our case, the beer data is time series with constant seasonal variations so we will use the Additive Holt-Winters method
beer = read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from beer (pos = 3):
##
## OzBeer
## The following object is masked from beer (pos = 4):
##
## OzBeer
## The following object is masked from beer (pos = 5):
##
## OzBeer
## The following object is masked from beer (pos = 6):
##
## OzBeer
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
beer_ts = ts(beer,frequency=4)
plot(beer_ts,col="Blue", main = "beer Sale")
fit = hw(beer_ts, seasonal="additive")
plot(beer_ts,col="Blue", main = "Beer Sale")
lines(fitted(fit),col="red")
fitted(fit)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1 264.5711 222.4177 235.8788 314.6716
## 2 269.1815 219.2323 232.9907 312.6972
## 3 268.0774 222.4238 236.2082 317.5723
## 4 272.1912 227.0247 238.0433 318.7928
## 5 272.0273 230.6423 244.4301 323.1572
## 6 280.1063 235.9443 254.1045 327.2834
## 7 291.0935 242.5409 262.7210 338.2571
## 8 297.8437 253.8770 274.2161 349.9664
## 9 308.1938 266.3948 288.5314 367.9393
## 10 325.1499 284.5868 308.4243 385.7316
## 11 343.9186 302.0324 323.1390 399.0237
## 12 354.2837 310.6274 332.0563 411.6082
## 13 365.1922 324.7481 343.7037 422.8812
## 14 384.8971 336.8464 353.6417 441.2810
## 15 398.9884 349.6714 369.6807 456.7315
## 16 411.8143 367.3324 387.0117 474.6557
## 17 427.8514 383.8941 400.7029 491.7647
## 18 440.7737 398.7928 415.8050 512.9939
forecast(fit,8)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 19 Q1 463.6812 451.2014 476.1609 444.5951 482.7672
## 19 Q2 416.0852 403.5665 428.6039 396.9395 435.2309
## 19 Q3 438.6314 426.0255 451.2373 419.3523 457.9105
## 19 Q4 536.0253 523.2657 548.7849 516.5112 555.5394
## 20 Q1 484.0115 470.3853 497.6376 463.1721 504.8509
## 20 Q2 436.4155 422.4713 450.3597 415.0897 457.7413
## 20 Q3 458.9617 444.5959 473.3275 436.9911 480.9323
## 20 Q4 556.3556 541.4571 571.2541 533.5704 579.1409
plot(forecast(fit,8))
fit$model
## Holt-Winters' additive method
##
## Call:
## hw(y = beer_ts, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.0395
## beta = 0.0395
## gamma = 0.1854
##
## Initial states:
## l = 258.2137
## b = 0.0239
## s=55.077 -23.9983 -37.4121 6.3335
##
## sigma: 9.738
##
## AIC AICc BIC
## 653.6687 656.5719 674.1587
states = fit$model$states[,1:3]
colnames(states)= cbind('level','trend','seasonality')
plot(states,col="blue",main = "Decomposing the forecast")
Autocorrelation is the correlation of a Time Series with lags of itself. It shows if the previous states (lagged observations) of the time series has an influence on the current state. In the autocorrelation chart, if the autocorrelation crosses the dashed blue line, it means that specific lag is significantly correlated with current series. It is used commonly to determine if the time series is stationary or not. A stationary time series will have the autocorrelation fall to zero fairly quickly but for a non-stationary series it drops gradually.
beer = read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from beer (pos = 3):
##
## OzBeer
## The following object is masked from beer (pos = 4):
##
## OzBeer
## The following object is masked from beer (pos = 5):
##
## OzBeer
## The following object is masked from beer (pos = 6):
##
## OzBeer
## The following object is masked from beer (pos = 7):
##
## OzBeer
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
acf=acf(OzBeer)
Partial Autocorrelation is the correlation of the time series with a lag of itself, with the linear dependence of all the lags between them removed. The partial autocorrelation function is a measure of the correlation between observations of a time series that are separated by k time units (yt and yt-k), after adjusting for the presence of all the other terms of shorter lag (yt-1, yt-2, ., yt-k-1).
pacf=pacf(OzBeer)
Use the partial autocorrelation and autocorrelation functions together to identify ARIMA models. Examine the spikes at each lag to determine whether they are significant. A significant spike will extend beyond the significant limits, which indicates that the correlation for that lag doesn’t equal zero. #Case 1: PACF Case 1: Large spike at lag 1 that decreases after a few lags. A moving average(MA) term in the data. Use the autocorrelation function(ACF) to determine the order of the moving average term.
We cannot tell exactly which model is the best, we need to create more models based on P and Q. Then by AIC value we can find which model is the best ARIMA(P,d,q) p and q determines the number of Auto Regressive and Error terms in the model that we are trying to fit p and q are estimated using Auto Correlation and Partial Autocorrelation functions/plot. It is here that modelling become an “art” when choosing values of p and q
ARIMA(P,d,q) p and q determines the number of Auto Regressive and Error terms in the model that we are trying to fit p and q are estimated using Auto Correlation and Partial Autocorrelation functions/plot. It is here that modelling become an “art” when choosing values of p and q
beer = read.csv("beer_assignment.csv",1)
attach(beer)
## The following object is masked from beer (pos = 3):
##
## OzBeer
## The following object is masked from beer (pos = 4):
##
## OzBeer
## The following object is masked from beer (pos = 5):
##
## OzBeer
## The following object is masked from beer (pos = 6):
##
## OzBeer
## The following object is masked from beer (pos = 7):
##
## OzBeer
## The following object is masked from beer (pos = 8):
##
## OzBeer
## The following object is masked from stationary:
##
## OzBeer
## The following object is masked from TSA1:
##
## OzBeer
beer_ts = ts(OzBeer,frequency = 4, start = c(2010))
plot(beer_ts, col="Blue", main="Time Series of Beer")
1.There is a trend component which grows the beer consumtion year by year. 2.There looks to be a seasonal component which has a cycle less than 4 months. 3.The variance in the data is constanct with time.
nsdiffs(beer_ts)
## [1] 1
# we have to do 1 diffrencing to remove seasonality
ndiffs(beer_ts)
## [1] 1
# we have to do 1 diffrencing to remove trend
kpss.test(beer_ts)
## Warning in kpss.test(beer_ts): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: beer_ts
## KPSS Level = 3.0456, Truncation lag parameter = 1, p-value = 0.01
#P Value is 0.01 which is less than 0.05 so reject null Hypothesis
#so data is not stationary
kpss.test(diff(beer_ts))
## Warning in kpss.test(diff(beer_ts)): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(beer_ts)
## KPSS Level = 0.036853, Truncation lag parameter = 1, p-value = 0.1
#Now P Value is 0.1 which is greater than 0.05 so cannot reject null Hypothesis
#so data is stationary now
Next step is to find the right parameters to be used in the ARIMA model. We already know that the ‘d’ component is 1 as we need 1 difference to make the series stationary. We do this using the Correlation plots.
acf((beer_ts))
#we have to use the 1 diff data
#Clearly data is not stationary BUT we discussed that we have to use the new differenced series to test the stationarity condition
acf(diff(beer_ts))
pacf(diff(beer_ts))
fit = arima(diff(beer_ts), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 4))
fit
##
## Call:
## arima(x = diff(beer_ts), order = c(0, 1, 1), seasonal = list(order = c(0, 1,
## 1), period = 4))
##
## Coefficients:
## ma1 sma1
## -1.0000 -0.7573
## s.e. 0.0509 0.0984
##
## sigma^2 estimated as 214.2: log likelihood = -275.75, aic = 555.51
fit1 = arima((beer_ts), c(0, 1, 2),seasonal = list(order = c(0, 1, 1), period = 4))
fit1
##
## Call:
## arima(x = (beer_ts), order = c(0, 1, 2), seasonal = list(order = c(0, 1, 1),
## period = 4))
##
## Coefficients:
## ma1 ma2 sma1
## -1.0724 0.3285 -0.6293
## s.e. 0.1170 0.1135 0.1093
##
## sigma^2 estimated as 104.8: log likelihood = -252.58, aic = 511.16
fit2 = arima((beer_ts), c(0, 1, 3),seasonal = list(order = c(0, 1, 3), period = 4))
fit2
##
## Call:
## arima(x = (beer_ts), order = c(0, 1, 3), seasonal = list(order = c(0, 1, 3),
## period = 4))
##
## Coefficients:
## ma1 ma2 ma3 sma1 sma2 sma3
## -1.0678 0.2795 0.0473 -0.7178 -0.0354 0.1847
## s.e. 0.1427 0.2285 0.1543 0.1698 0.1778 0.1636
##
## sigma^2 estimated as 101.4: log likelihood = -251.86, aic = 515.72
fit3 = arima((beer_ts), c(1, 1, 2),seasonal = list(order = c(1, 1, 2), period = 4))
fit3
##
## Call:
## arima(x = (beer_ts), order = c(1, 1, 2), seasonal = list(order = c(1, 1, 2),
## period = 4))
##
## Coefficients:
## ar1 ma1 ma2 sar1 sma1 sma2
## 0.0208 -1.0743 0.3167 -0.8662 0.3591 -0.6409
## s.e. 0.3970 0.3600 0.2875 0.1210 0.1551 0.1325
##
## sigma^2 estimated as 98.18: log likelihood = -251.86, aic = 515.72
fit4 = arima((beer_ts), c(2, 1, 2),seasonal = list(order = c(2, 1, 2), period = 4))
fit4
##
## Call:
## arima(x = (beer_ts), order = c(2, 1, 2), seasonal = list(order = c(2, 1, 2),
## period = 4))
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sar2 sma1 sma2
## -0.4562 -0.2416 -0.6039 0.0234 -0.9597 -0.1059 0.4170 -0.5829
## s.e. 0.7524 0.2474 0.7519 0.5947 0.2322 0.2289 0.1995 0.1749
##
## sigma^2 estimated as 95.61: log likelihood = -251.31, aic = 518.61
#From the above models based on the AIC criteria we can go ahead with fit1 which is having the lowest AIC of 511.16
fit1
##
## Call:
## arima(x = (beer_ts), order = c(0, 1, 2), seasonal = list(order = c(0, 1, 1),
## period = 4))
##
## Coefficients:
## ma1 ma2 sma1
## -1.0724 0.3285 -0.6293
## s.e. 0.1170 0.1135 0.1093
##
## sigma^2 estimated as 104.8: log likelihood = -252.58, aic = 511.16
fit1$aic
## [1] 511.1552
Ljung-Box test is carried out on residuals to see that after fitting the model what remains is actually the residuals. Null Hypothesis: Data is independently distributed Alternative Hypothesis: Data shows serial correlation
Box.test(residuals(fit1),lag=4,type="Ljung")
##
## Box-Ljung test
##
## data: residuals(fit1)
## X-squared = 1.135, df = 4, p-value = 0.8887
P value is >0.05 so accept the null hypothesis,“Data is independently distributed”
pred <- predict(fit1, n.ahead = 8)
ts.plot(beer_ts,pred$pred, lty = c(1,3),ylabel ="beer",main="Forecasted Series")
## Warning in xy.coords(x = matrix(rep.int(tx, k), ncol = k), y = x, log =
## log, : NAs introduced by coercion
## Warning in xy.coords(x, y): NAs introduced by coercion