library('TSA')

## Loading required package: leaps

## Loading required package: locfit

## locfit 1.5-9.1    2013-03-22

## Loading required package: mgcv

## Loading required package: nlme

## This is mgcv 1.8-22. For overview type 'help("mgcv-package")'.

## Loading required package: tseries

## 
## Attaching package: 'TSA'

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

library('tseries')
library('forecast')

## 
## Attaching package: 'forecast'

## The following object is masked from 'package:nlme':
## 
##     getResponse

TSA1 = read.csv("beer_assignment.csv")
head(TSA1)

##   OzBeer
## 1  284.4
## 2  212.8
## 3  226.9
## 4  308.4
## 5  262.0
## 6  227.9

attach(TSA1)

timeseries = ts(TSA1,frequency =12,start = c(2012,1))
timeseries

##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
## 2012 284.4 212.8 226.9 308.4 262.0 227.9 236.1 320.4 271.9 232.8 237.0
## 2013 261.4 226.8 249.9 314.3 286.1 226.5 260.4 311.4 294.7 232.6 257.2
## 2014 279.1 249.8 269.8 345.7 293.8 254.7 277.5 363.4 313.4 272.8 300.1
## 2015 330.8 287.8 305.9 386.1 335.2 288.0 308.3 402.3 352.8 316.1 324.9
## 2016 393.0 318.9 327.0 442.3 383.1 331.6 361.4 445.9 386.6 357.2 373.6
## 2017 409.6 369.8 378.6 487.0 419.2 376.7 392.8 506.1 458.4 387.4 426.9
##        Dec
## 2012 313.4
## 2013 339.2
## 2014 369.5
## 2015 404.8
## 2016 466.2
## 2017 525.0

is.ts(timeseries)

## [1] TRUE

plot.ts(timeseries,col="blue",main="Sales data time series with timestamp")
abline(reg=lm(timeseries~ time(timeseries)),col="red")

#This shows there is clear trend and Sesonality in this data set

Data analysis

Annual_beer = aggregate(timeseries)
str(Annual_beer)

##  Time-Series [1:6, 1] from 2012 to 2017: 3134 3260 3590 4043 4587 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "OzBeer"

summary(Annual_beer)

##      OzBeer    
##  Min.   :3134  
##  1st Qu.:3343  
##  Median :3816  
##  Mean   :3959  
##  3rd Qu.:4451  
##  Max.   :5138

#How does the year on year increase in annual beer consumption look like?
plot(Annual_beer,col="Blue", main = "Anual beer consumption year on year")

#Evaluate the average increase in Beer year on year
Average_beer = aggregate(timeseries, FUN = mean)
Average_beer

## Time Series:
## Start = 2012 
## End = 2017 
## Frequency = 1 
##        OzBeer
## [1,] 261.1667
## [2,] 271.7083
## [3,] 299.1333
## [4,] 336.9167
## [5,] 382.2333
## [6,] 428.1250

plot(Average_beer,col="Blue", main = "Average Increase in Customers year on year")

#Evaluate the variance across months in a cycle and within months over multiple cycles
boxplot(timeseries~cycle(timeseries), main="Variation between months and between years")

Test for Stationary

Kwiatkowski-Phillips-Schmidt-Shin (KPSS) tests are used for testing a null hypothesis that an observable time series is stationary Here the Null Hypothesis is that the Time Series is Stationary

stationary = read.csv("beer_assignment.csv",1)
attach(stationary)

## The following object is masked from TSA1:
## 
##     OzBeer

beer = ts(stationary, frequency =12)
plot(beer, col="blue", main = "Before stationary differencing")

kpss.test(beer)

## Warning in kpss.test(beer): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  beer
## KPSS Level = 3.0456, Truncation lag parameter = 1, p-value = 0.01

P Values is less than 0.05 so reject the Null Hypothesis which is Data is stationary. So This Time Series is not stationary

Removing Trends to achieve stationary

Diffrencing Differencing a series d times will make it I(d) series stationary. In addition differencing once will remove any linear trend.

Removing seasonal variation to achieve stationarity

Where seasonal variation is present, one way to remove it is to take a seasonal difference. These are also called “lag-m differences” as we subtract the observation after a lag of m periods.

Generating Stationary Timeseries

R has a built in function that allows us to determine how many times we have to conduct trend and seasonal differencing nsdiffs:-estimates the number of seasonal differences.

nsdiffs(beer)

## [1] 1

#This means we have to do 1 diff

beer_diff_seas = diff(beer, lag = frequency(beer),differences = 1)
plot(beer_diff_seas, col="blue", main = "After stationary differencing")

#Visually: Data is not stationary Lets check mathamatically whether data is Stationary

kpss.test(beer_diff_seas)

## Warning in kpss.test(beer_diff_seas): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  beer_diff_seas
## KPSS Level = 2.2781, Truncation lag parameter = 1, p-value = 0.01

#P- Value is 0.01 <0.05 so reject the null Hypothesis.
#Null Hypothesis is Data is Stationary, that means Data is still not stationary
#Lets add in a linear regression equation to this graph to see whether there is any trend in data

plot(beer_diff_seas, col="blue", main = "After stationary differencing")
abline(reg=lm(beer_diff_seas~ time(beer_diff_seas)),col="red")

Identifying Trend Difference

ndiffs(beer_diff_seas)

## [1] 1

#This means we have to do 1 diff

beer_diff_seas_trend = diff(beer_diff_seas, differences=1)
plot(beer_diff_seas_trend, col = "green", main ="After Trend and Seasonality Differencing")

#Lets test timeseries is stationary after trend difference
kpss.test(beer_diff_seas_trend)

## Warning in kpss.test(beer_diff_seas_trend): p-value greater than printed p-
## value

## 
##  KPSS Test for Level Stationarity
## 
## data:  beer_diff_seas_trend
## KPSS Level = 0.056024, Truncation lag parameter = 1, p-value = 0.1

P- Value is 0.1 > 0.05 so cannot reject the null Hypothesis. Null Hypothesis is Data is Stationary, that means Data is stationary

Moving Averages & Smoothing

The classical method of time series decomposition originated in the 1920s and was widely used until the 1950s It still forms the basis of later time series methods, and so it is important to understand how it works The first step in a classical decomposition is to use a moving average method to estimate the trend

beer = read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

beer_ts = ts(beer,frequency=1, start=c(2012))
plot(beer_ts,col="green", main="Beer Consumption")
abline(reg=lm(beer_ts~ time(beer_ts)),col="red")

#There is trend and seasonality in this Time Series data

#now lets check through Moving Average
MA = ma(beer_ts,5)
plot.ts(beer_ts, col="green", main = "Beer Consumption")
lines(MA,col="blue")

MA1 = ma(beer_ts,7)
plot.ts(beer_ts, col="green", main = "Beer Consumption")
lines(MA1,col="blue")

Centered Moving Averages

beer = read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from beer (pos = 3):
## 
##     OzBeer

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

str(beer)

## 'data.frame':    72 obs. of  1 variable:
##  $ OzBeer: num  284 213 227 308 262 ...

beer = ts(beer,frequency=4)
plot.ts(OzBeer,col="green",main="Quarterly Beer Sales")
MA = ma(OzBeer,4,T) 
#centering is implemented by setting parameter to True (T) or False (F)
lines(MA,col="red")

#De Trending Can be done in 3 Ways 1.Fit a linear regression line 2.Do Moving Average 3.Do differencing

Modelling Time Series

Generally, two types of models are used for Modelling Time Series Additive Model = Trend + Seasonality + Random Multiplicative Model = Trend * Seasonality * Random

Step 1: Identifying an Additive Timeseries

The seasonal variation looks constant. It doesn’t change when the time series increases. We should use the additive model

Time Series = Trend + Seasonal + Random

beer = read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from beer (pos = 3):
## 
##     OzBeer

## The following object is masked from beer (pos = 4):
## 
##     OzBeer

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

ts_beer = ts(beer, frequency = 4)
plot(ts_beer, col="blue",main="Beer sales every quarter")

In this plot seasonal variation looks constant, so this is a Additive Time Series

Step 2A: Detecting Seasonality

To perform the decomposition, it is vital to use a moving window of the exact size of the seasonality. Therefore, to decompose a time series we need to know the seasonality period: weekly, monthly, etc Using a “periodogram” in R we can detect the seasonality

p = periodogram(OzBeer)

From the diagram we can see two peaks one at 0.0 and another at 0.25 1/0.25 = 4 As we know Time =1/frequency Time =1/0.25 = 4 #Step 2A: Detecting Seasonality Mathematically

The signal is the strongest in the beginning and then it is strongest at a frequency of 0.25. The relationship between periodicity and frequency is given by Periodicity = 1/Frequency Periodicity = 1/.25 = 4

dd = data.frame(freq=p$freq,spec=p$spec)
order = dd[order(-dd$spec),]
top2 = head(order,2)
# display the 2 highest "power" frequencies
 top2

##          freq     spec
## 1  0.01388889 175272.3
## 18 0.25000000 105281.0

#So far we have identified the top 2 frequencies corresponding to the top 2 Signals
# convert frequency to time periods
time = 1/top2$freq
time

## [1] 72  4

The highest signal in the periodogram occurred at a frequency of 0.01388889 which correspond to 72 quarters which is the entire data set!So we dont want to take that The next highest signal is detected at a frequency of 0.25 which corresponds to 4 quarters. If we check the graph also we can see a seasonality with each 4 datapoints This gives a periodicity of 4 quarters. The Australian beer production clearly follows an annual seasonality and records data quarterly i.e. 4 times in a year. Use a moving average window of 4.

Step 2B: Visualizing the Trend

beer_trend = ma(ts_beer,order = 4, centre = T)
plot(ts_beer, col="blue", main ="Quartely Beer Sales in Australia")
lines(beer_trend, col="green")

plot(beer_trend, col = "green", main = "Trend Line in Time Series")

Step 3: Detrending time series

Removing the previously calculated trend from the time series will result into a new time series clearly exposing the seasonality.

detrend_beer = ts_beer - beer_trend
plot(detrend_beer, col="blue", main = "Time Series after removing the Trend Component")

#Seasonality We have to determine the seasonal part of time series and remove it from the detrended time series The detrended time series is arranged in 4 rows representing Q1,Q2,Q3 and Q4. We have data for 18 years so we get a 4 X 18 matrix The average across each row is computed. This will give us a SEQUENCE of 4 averages representing Q1, Q2, Q3 and Q4 We REPEAT this SEQUENCE 18 time and this forms the Seasonal Component of the time series

m_beer = t(matrix(data = detrend_beer, nrow = 4))
m_beer

##          [,1]     [,2]     [,3]    [,4]
##  [1,]      NA       NA -28.4250 53.9875
##  [2,]  4.5500 -32.2000 -26.7375 55.7125
##  [3,]  6.4875 -31.8500 -25.4625 53.0000
##  [4,]  0.1375 -36.1875 -16.2875 45.0625
##  [5,] 15.5875 -44.9625 -11.7750 37.3875
##  [6,] 20.3250 -44.8500 -21.7750 60.0250
##  [7,] -3.8000 -35.4875 -18.1375 55.3125
##  [8,]  1.8375 -40.4375 -22.3000 58.8875
##  [9,]  3.8000 -40.3875 -16.0250 49.3250
## [10,]  8.0250 -37.7750 -22.3000 57.3250
## [11,]  6.1000 -43.4250 -27.3500 60.9375
## [12,]  5.8500 -33.2375 -29.7750 44.7500
## [13,] 32.3375 -46.7125 -42.0625 72.8875
## [14,]  7.8000 -48.4500 -19.5375 61.3250
## [15,] -2.7000 -36.1625 -25.1750 62.9750
## [16,]  4.1750 -38.8500 -33.8500 72.4875
## [17,]  2.0500 -44.6125 -35.8000 71.2625
## [18,] 17.9625 -59.6625       NA      NA

Step 4: Seasonal Variation

seasonal_beer = colMeans(m_beer, na.rm = T)
seasonal_beer

## [1]   7.677941 -40.897059 -24.869118  57.214706

seasonal = rep(seasonal_beer,18)
plot.ts(seasonal, col="blue", main="Seasonal Component of Time Series")

Step 5: Random Noise

random = detrend_beer-seasonal
plot(random,col="Red", main = "Random Noise")

#Step 6: Reconstruct the time series

reconstruct = beer_trend + seasonal + random
plot(reconstruct,col="Blue", main = "Getting the original time series")

Comparing original with reconstructed

The decomposed time series can logically be re-composed using the model formula. This should reproduce the original signal. Some data are missing at the beginning and the end of the recomposed time series. This is due to the moving average windows who need to ingest some data point before producing average data points. Using Decompose () Decompose a time series into seasonal, trend and irregular components using moving averages. Deals with additive or multiplicative seasonal component.

beer=read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from beer (pos = 3):
## 
##     OzBeer

## The following object is masked from beer (pos = 4):
## 
##     OzBeer

## The following object is masked from beer (pos = 5):
## 
##     OzBeer

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

ts_beer = ts(beer, frequency = 4)
decompose_beer = decompose(ts_beer, "additive")
plot(as.ts(decompose_beer$seasonal))

plot(as.ts(decompose_beer$trend))

plot(as.ts(decompose_beer$random))

plot(decompose_beer)

Exponential Smoothing

Simple moving average method assigns equal weights (1/k) to all k data points. Arguably, recent observations provide more relevant information than do observations in the past. So we want a weighting scheme that assigns decreasing weights to the more distant observations. Exponential smoothing methods give larger weights to more recent observations, and the weights decrease exponentially as the observations become more distant. These methods are most effective when the parameters describing the time series are changing SLOWLY over time.

Part A : Holt-Winters Methods

Two Holt-Winters methodsare designed for time series that exhibit linear trend - Additive Holt-Winters method: used for time series with constant (additive) seasonal variations Multiplicative Holt-Winters method: used for time series with increasing (multiplicative) seasonal variations Holt-Winters method is an exponential smoothing approach for handling SEASONAL data. The additive method is preferred when the seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series.

In our case, the beer data is time series with constant seasonal variations so we will use the Additive Holt-Winters method

Beer : Time Series

beer = read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from beer (pos = 3):
## 
##     OzBeer

## The following object is masked from beer (pos = 4):
## 
##     OzBeer

## The following object is masked from beer (pos = 5):
## 
##     OzBeer

## The following object is masked from beer (pos = 6):
## 
##     OzBeer

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

beer_ts = ts(beer,frequency=4)
plot(beer_ts,col="Blue", main = "beer Sale")

Beer Sales: Original + Forecast

fit = hw(beer_ts, seasonal="additive")
plot(beer_ts,col="Blue", main = "Beer Sale")
lines(fitted(fit),col="red")

fitted(fit)

##        Qtr1     Qtr2     Qtr3     Qtr4
## 1  264.5711 222.4177 235.8788 314.6716
## 2  269.1815 219.2323 232.9907 312.6972
## 3  268.0774 222.4238 236.2082 317.5723
## 4  272.1912 227.0247 238.0433 318.7928
## 5  272.0273 230.6423 244.4301 323.1572
## 6  280.1063 235.9443 254.1045 327.2834
## 7  291.0935 242.5409 262.7210 338.2571
## 8  297.8437 253.8770 274.2161 349.9664
## 9  308.1938 266.3948 288.5314 367.9393
## 10 325.1499 284.5868 308.4243 385.7316
## 11 343.9186 302.0324 323.1390 399.0237
## 12 354.2837 310.6274 332.0563 411.6082
## 13 365.1922 324.7481 343.7037 422.8812
## 14 384.8971 336.8464 353.6417 441.2810
## 15 398.9884 349.6714 369.6807 456.7315
## 16 411.8143 367.3324 387.0117 474.6557
## 17 427.8514 383.8941 400.7029 491.7647
## 18 440.7737 398.7928 415.8050 512.9939

Beer Sale: Original + Forecast + Predictive

forecast(fit,8)

##       Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 19 Q1       463.6812 451.2014 476.1609 444.5951 482.7672
## 19 Q2       416.0852 403.5665 428.6039 396.9395 435.2309
## 19 Q3       438.6314 426.0255 451.2373 419.3523 457.9105
## 19 Q4       536.0253 523.2657 548.7849 516.5112 555.5394
## 20 Q1       484.0115 470.3853 497.6376 463.1721 504.8509
## 20 Q2       436.4155 422.4713 450.3597 415.0897 457.7413
## 20 Q3       458.9617 444.5959 473.3275 436.9911 480.9323
## 20 Q4       556.3556 541.4571 571.2541 533.5704 579.1409

plot(forecast(fit,8))

Model Analysis

fit$model

## Holt-Winters' additive method 
## 
## Call:
##  hw(y = beer_ts, seasonal = "additive") 
## 
##   Smoothing parameters:
##     alpha = 0.0395 
##     beta  = 0.0395 
##     gamma = 0.1854 
## 
##   Initial states:
##     l = 258.2137 
##     b = 0.0239 
##     s=55.077 -23.9983 -37.4121 6.3335
## 
##   sigma:  9.738
## 
##      AIC     AICc      BIC 
## 653.6687 656.5719 674.1587

Beer Sale: Decomposed

states = fit$model$states[,1:3]
colnames(states)= cbind('level','trend','seasonality')
plot(states,col="blue",main = "Decomposing the forecast")

Time Series Forecasting

Autocorrelation and ACF plots

Autocorrelation is the correlation of a Time Series with lags of itself. It shows if the previous states (lagged observations) of the time series has an influence on the current state. In the autocorrelation chart, if the autocorrelation crosses the dashed blue line, it means that specific lag is significantly correlated with current series. It is used commonly to determine if the time series is stationary or not. A stationary time series will have the autocorrelation fall to zero fairly quickly but for a non-stationary series it drops gradually.

beer = read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from beer (pos = 3):
## 
##     OzBeer

## The following object is masked from beer (pos = 4):
## 
##     OzBeer

## The following object is masked from beer (pos = 5):
## 
##     OzBeer

## The following object is masked from beer (pos = 6):
## 
##     OzBeer

## The following object is masked from beer (pos = 7):
## 
##     OzBeer

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

acf=acf(OzBeer)

Partial Autocorrelation and PACF plot

Partial Autocorrelation is the correlation of the time series with a lag of itself, with the linear dependence of all the lags between them removed. The partial autocorrelation function is a measure of the correlation between observations of a time series that are separated by k time units (yt and yt-k), after adjusting for the presence of all the other terms of shorter lag (yt-1, yt-2, ., yt-k-1).

pacf=pacf(OzBeer)

Interpreting PACF

Use the partial autocorrelation and autocorrelation functions together to identify ARIMA models. Examine the spikes at each lag to determine whether they are significant. A significant spike will extend beyond the significant limits, which indicates that the correlation for that lag doesn’t equal zero. #Case 1: PACF Case 1: Large spike at lag 1 that decreases after a few lags. A moving average(MA) term in the data. Use the autocorrelation function(ACF) to determine the order of the moving average term.

We cannot tell exactly which model is the best, we need to create more models based on P and Q. Then by AIC value we can find which model is the best ARIMA(P,d,q) p and q determines the number of Auto Regressive and Error terms in the model that we are trying to fit p and q are estimated using Auto Correlation and Partial Autocorrelation functions/plot. It is here that modelling become an “art” when choosing values of p and q

PART B : ARIMA Method

ARIMA(P,d,q) p and q determines the number of Auto Regressive and Error terms in the model that we are trying to fit p and q are estimated using Auto Correlation and Partial Autocorrelation functions/plot. It is here that modelling become an “art” when choosing values of p and q

Step 1: Visualize the time series

beer = read.csv("beer_assignment.csv",1)
attach(beer)

## The following object is masked from beer (pos = 3):
## 
##     OzBeer

## The following object is masked from beer (pos = 4):
## 
##     OzBeer

## The following object is masked from beer (pos = 5):
## 
##     OzBeer

## The following object is masked from beer (pos = 6):
## 
##     OzBeer

## The following object is masked from beer (pos = 7):
## 
##     OzBeer

## The following object is masked from beer (pos = 8):
## 
##     OzBeer

## The following object is masked from stationary:
## 
##     OzBeer

## The following object is masked from TSA1:
## 
##     OzBeer

beer_ts = ts(OzBeer,frequency = 4, start = c(2010))
plot(beer_ts, col="Blue", main="Time Series of Beer")

1.There is a trend component which grows the beer consumtion year by year. 2.There looks to be a seasonal component which has a cycle less than 4 months. 3.The variance in the data is constanct with time.

Step 2: Stationarity

nsdiffs(beer_ts)

## [1] 1

# we have to do 1 diffrencing to remove seasonality

ndiffs(beer_ts)

## [1] 1

# we have to do 1 diffrencing to remove trend

kpss.test(beer_ts)

## Warning in kpss.test(beer_ts): p-value smaller than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  beer_ts
## KPSS Level = 3.0456, Truncation lag parameter = 1, p-value = 0.01

#P Value is 0.01 which is less than 0.05 so reject null Hypothesis
#so data is not stationary

kpss.test(diff(beer_ts))

## Warning in kpss.test(diff(beer_ts)): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff(beer_ts)
## KPSS Level = 0.036853, Truncation lag parameter = 1, p-value = 0.1

#Now P Value is 0.1 which is greater than 0.05 so cannot reject null Hypothesis
#so data is stationary now

Step 3: Determining Model Order

Next step is to find the right parameters to be used in the ARIMA model. We already know that the ‘d’ component is 1 as we need 1 difference to make the series stationary. We do this using the Correlation plots.

acf((beer_ts))

#we have to use the 1 diff data
#Clearly data is not stationary BUT we discussed that we have to use the new differenced  series to test the stationarity condition


acf(diff(beer_ts))

pacf(diff(beer_ts))

Step 4: Fit and ARIMA Model

fit = arima(diff(beer_ts), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 4))
fit

## 
## Call:
## arima(x = diff(beer_ts), order = c(0, 1, 1), seasonal = list(order = c(0, 1, 
##     1), period = 4))
## 
## Coefficients:
##           ma1     sma1
##       -1.0000  -0.7573
## s.e.   0.0509   0.0984
## 
## sigma^2 estimated as 214.2:  log likelihood = -275.75,  aic = 555.51

fit1 = arima((beer_ts), c(0, 1, 2),seasonal = list(order = c(0, 1, 1), period = 4))
fit1

## 
## Call:
## arima(x = (beer_ts), order = c(0, 1, 2), seasonal = list(order = c(0, 1, 1), 
##     period = 4))
## 
## Coefficients:
##           ma1     ma2     sma1
##       -1.0724  0.3285  -0.6293
## s.e.   0.1170  0.1135   0.1093
## 
## sigma^2 estimated as 104.8:  log likelihood = -252.58,  aic = 511.16

fit2 = arima((beer_ts), c(0, 1, 3),seasonal = list(order = c(0, 1, 3), period = 4))
fit2

## 
## Call:
## arima(x = (beer_ts), order = c(0, 1, 3), seasonal = list(order = c(0, 1, 3), 
##     period = 4))
## 
## Coefficients:
##           ma1     ma2     ma3     sma1     sma2    sma3
##       -1.0678  0.2795  0.0473  -0.7178  -0.0354  0.1847
## s.e.   0.1427  0.2285  0.1543   0.1698   0.1778  0.1636
## 
## sigma^2 estimated as 101.4:  log likelihood = -251.86,  aic = 515.72

fit3 = arima((beer_ts), c(1, 1, 2),seasonal = list(order = c(1, 1, 2), period = 4))
fit3

## 
## Call:
## arima(x = (beer_ts), order = c(1, 1, 2), seasonal = list(order = c(1, 1, 2), 
##     period = 4))
## 
## Coefficients:
##          ar1      ma1     ma2     sar1    sma1     sma2
##       0.0208  -1.0743  0.3167  -0.8662  0.3591  -0.6409
## s.e.  0.3970   0.3600  0.2875   0.1210  0.1551   0.1325
## 
## sigma^2 estimated as 98.18:  log likelihood = -251.86,  aic = 515.72

fit4 = arima((beer_ts), c(2, 1, 2),seasonal = list(order = c(2, 1, 2), period = 4))
fit4

## 
## Call:
## arima(x = (beer_ts), order = c(2, 1, 2), seasonal = list(order = c(2, 1, 2), 
##     period = 4))
## 
## Coefficients:
##           ar1      ar2      ma1     ma2     sar1     sar2    sma1     sma2
##       -0.4562  -0.2416  -0.6039  0.0234  -0.9597  -0.1059  0.4170  -0.5829
## s.e.   0.7524   0.2474   0.7519  0.5947   0.2322   0.2289  0.1995   0.1749
## 
## sigma^2 estimated as 95.61:  log likelihood = -251.31,  aic = 518.61

#From the above models based on the AIC criteria we can go ahead with fit1 which is having the lowest AIC of 511.16
fit1

## 
## Call:
## arima(x = (beer_ts), order = c(0, 1, 2), seasonal = list(order = c(0, 1, 1), 
##     period = 4))
## 
## Coefficients:
##           ma1     ma2     sma1
##       -1.0724  0.3285  -0.6293
## s.e.   0.1170  0.1135   0.1093
## 
## sigma^2 estimated as 104.8:  log likelihood = -252.58,  aic = 511.16

fit1$aic

## [1] 511.1552

Step 5: Evaluating Residuals

Ljung-Box test is carried out on residuals to see that after fitting the model what remains is actually the residuals. Null Hypothesis: Data is independently distributed Alternative Hypothesis: Data shows serial correlation

Box.test(residuals(fit1),lag=4,type="Ljung")

## 
##  Box-Ljung test
## 
## data:  residuals(fit1)
## X-squared = 1.135, df = 4, p-value = 0.8887

P value is >0.05 so accept the null hypothesis,“Data is independently distributed”

Step 5: Forecasting

pred <- predict(fit1, n.ahead = 8)
ts.plot(beer_ts,pred$pred, lty = c(1,3),ylabel ="beer",main="Forecasted Series")

## Warning in xy.coords(x = matrix(rep.int(tx, k), ncol = k), y = x, log =
## log, : NAs introduced by coercion

## Warning in xy.coords(x, y): NAs introduced by coercion