SARIMA simulation

x=NULL
z=NULL
n=10000

z=rnorm(n)
x[1:13]=1

for(i in 14:n){
  x[i]<-z[i]+0.7*z[i-1]+0.6*z[i-12]+0.42*z[i-13]
}

par(mfrow=c(2,1))
plot.ts(x[12:120], main='The first 10 months of simulation SARIMA(0,0,1,0,0)_12', ylab='') 

acf(x, main='SARIMA(0,0,1,0,0,1)_12 Simulation')

SARIMA code for J&J data set

library(astsa)
plot(jj, main="Johnson & Johnson Quarterly Earnings Per Share", ylab="EPS")

Perform SARIMA Analysis

d=1
DD=1

per=4

for(p in 1:2){
  for(q in 1:2){
    for(i in 1:2){
      for(j in 1:2){
        if(p+d+q+i+DD+j<=10){
          model<-arima(x=log(jj), order = c((p-1),d,(q-1)), seasonal = list(order=c((i-1),DD,(j-1)), period=per))
          pval<-Box.test(model$residuals, lag=log(length(model$residuals)))
          sse<-sum(model$residuals^2)
          cat(p-1,d,q-1,i-1,DD,j-1,per, 'AIC=', model$aic, ' SSE=',sse,' p-VALUE=', pval$p.value,'\n')
        }
      }
    }
  }
}
## 0 1 0 0 1 0 4 AIC= -124.0685  SSE= 0.9377871  p-VALUE= 0.0002610795 
## 0 1 0 0 1 1 4 AIC= -126.3493  SSE= 0.8856994  p-VALUE= 0.0001606542 
## 0 1 0 1 1 0 4 AIC= -125.9198  SSE= 0.8908544  p-VALUE= 0.0001978052 
## 0 1 0 1 1 1 4 AIC= -124.3648  SSE= 0.8854554  p-VALUE= 0.000157403 
## 0 1 1 0 1 0 4 AIC= -145.5139  SSE= 0.6891988  p-VALUE= 0.03543717 
## 0 1 1 0 1 1 4 AIC= -150.7528  SSE= 0.6265214  p-VALUE= 0.6089542 
## 0 1 1 1 1 0 4 AIC= -150.9134  SSE= 0.6251634  p-VALUE= 0.707918 
## 0 1 1 1 1 1 4 AIC= -149.1317  SSE= 0.6232876  p-VALUE= 0.6780876 
## 1 1 0 0 1 0 4 AIC= -139.8248  SSE= 0.7467494  p-VALUE= 0.03503386 
## 1 1 0 0 1 1 4 AIC= -146.0191  SSE= 0.6692691  p-VALUE= 0.5400205 
## 1 1 0 1 1 0 4 AIC= -146.0319  SSE= 0.6689661  p-VALUE= 0.5612964 
## 1 1 0 1 1 1 4 AIC= -144.3766  SSE= 0.6658382  p-VALUE= 0.5459445 
## 1 1 1 0 1 0 4 AIC= -145.8284  SSE= 0.667109  p-VALUE= 0.2200484 
## 1 1 1 0 1 1 4 AIC= -148.7706  SSE= 0.6263677  p-VALUE= 0.594822 
## 1 1 1 1 1 0 4 AIC= -148.9175  SSE= 0.6251104  p-VALUE= 0.7195469 
## 1 1 1 1 1 1 4 AIC= -144.4483  SSE= 0.6097742  p-VALUE= 0.3002702
sarima(log(jj), 0,1,1,1,1,0,4)
## initial  value -2.237259 
## iter   2 value -2.429075
## iter   3 value -2.446738
## iter   4 value -2.455821
## iter   5 value -2.459761
## iter   6 value -2.462511
## iter   7 value -2.462602
## iter   8 value -2.462749
## iter   9 value -2.462749
## iter   9 value -2.462749
## iter   9 value -2.462749
## final  value -2.462749 
## converged
## initial  value -2.411490 
## iter   2 value -2.412022
## iter   3 value -2.412060
## iter   4 value -2.412062
## iter   4 value -2.412062
## iter   4 value -2.412062
## final  value -2.412062 
## converged

## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ma1     sar1
##       -0.6796  -0.3220
## s.e.   0.0969   0.1124
## 
## sigma^2 estimated as 0.007913:  log likelihood = 78.46,  aic = -150.91
## 
## $degrees_of_freedom
## [1] 77
## 
## $ttable
##      Estimate     SE t.value p.value
## ma1   -0.6796 0.0969 -7.0104  0.0000
## sar1  -0.3220 0.1124 -2.8641  0.0054
## 
## $AIC
## [1] -1.910297
## 
## $AICc
## [1] -1.908298
## 
## $BIC
## [1] -1.820318

Monthly Milk Production Dataset

milk<-read.csv('monthly-milk-production-pounds-p.csv')
Milk<-milk$Pounds
library(astsa)
sarima(Milk,0,1,0,0,1,1,12)
## initial  value 1.960071 
## iter   2 value 1.820277
## iter   3 value 1.808696
## iter   4 value 1.803385
## iter   5 value 1.802687
## iter   6 value 1.800218
## iter   7 value 1.800130
## iter   8 value 1.800128
## iter   9 value 1.800127
## iter   9 value 1.800127
## iter   9 value 1.800127
## final  value 1.800127 
## converged
## initial  value 1.797249 
## iter   2 value 1.795522
## iter   3 value 1.795498
## iter   4 value 1.795498
## iter   4 value 1.795498
## final  value 1.795498 
## converged

## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##          sma1
##       -0.6750
## s.e.   0.0752
## 
## sigma^2 estimated as 34.47:  log likelihood = -459.66,  aic = 923.33
## 
## $degrees_of_freedom
## [1] 142
## 
## $ttable
##      Estimate     SE t.value p.value
## sma1   -0.675 0.0752 -8.9785       0
## 
## $AIC
## [1] 6.456845
## 
## $AICc
## [1] 6.457043
## 
## $BIC
## [1] 6.498283
library(astsa)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
## 
##     gas
d=NULL
DD=NULL
d=1
DD=1

per=12
for(p in 1:1){
  for(q in 1:1){
    for(i in 1:3){
      for(j in 1:4){
        if(p+d+q+i+DD+j<=10){
          model<-arima(x=Milk, order = c((p-1),d,(q-1)), seasonal = list(order=c((i-1),DD,(j-1)), period=per))
          pval<-Box.test(model$residuals, lag=log(length(model$residuals)))
          sse<-sum(model$residuals^2)
          cat(p-1,d,q-1,i-1,DD,j-1,per, 'AIC=', model$aic, ' SSE=',sse,' p-VALUE=', pval$p.value,'\n')
        }
      }
    }
  }
}
## 0 1 0 0 1 0 12 AIC= 968.3966  SSE= 7213.013  p-VALUE= 0.4393367 
## 0 1 0 0 1 1 12 AIC= 923.3288  SSE= 4933.349  p-VALUE= 0.6493728 
## 0 1 0 0 1 2 12 AIC= 925.3072  SSE= 4931.398  p-VALUE= 0.6529998 
## 0 1 0 0 1 3 12 AIC= 927.2329  SSE= 4925.911  p-VALUE= 0.6640233 
## 0 1 0 1 1 0 12 AIC= 938.6402  SSE= 5668.197  p-VALUE= 0.493531 
## 0 1 0 1 1 1 12 AIC= 925.3063  SSE= 4931.428  p-VALUE= 0.6531856 
## 0 1 0 1 1 2 12 AIC= 927.3036  SSE= 4931.135  p-VALUE= 0.6537708 
## 0 1 0 1 1 3 12 AIC= 929.2146  SSE= 4924.747  p-VALUE= 0.6627108 
## 0 1 0 2 1 0 12 AIC= 932.6438  SSE= 5308.012  p-VALUE= 0.6004804 
## 0 1 0 2 1 1 12 AIC= 927.2797  SSE= 4929.733  p-VALUE= 0.657349 
## 0 1 0 2 1 2 12 AIC= 926.8053  SSE= 4618.499  p-VALUE= 0.6826743
model<- arima(x=Milk, order = c(0,1,0), seasonal = list(order=c(0,1,1), period=12))
plot(forecast(model))

forecast(model)
##     Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## 157       823.3978 815.8740  830.9216 811.8911  834.9045
## 158       854.9196 844.2793  865.5598 838.6467  871.1925
## 159       882.1923 869.1607  895.2239 862.2622  902.1224
## 160       925.2390 910.1914  940.2866 902.2257  948.2523
## 161       958.4461 941.6225  975.2698 932.7165  984.1757
## 162       962.2105 943.7811  980.6399 934.0252  990.3959
## 163       890.9973 871.0912  910.9033 860.5536  921.4409
## 164       851.3336 830.0531  872.6140 818.7879  883.8792
## 165       829.7513 807.1800  852.3226 795.2314  864.2711
## 166       806.7802 782.9880  830.5725 770.3931  843.1673
## 167       795.9513 770.9978  820.9048 757.7882  834.1144
## 168       810.5435 784.4804  836.6066 770.6834  850.4036
## 169       835.7413 807.8366  863.6460 793.0648  878.4178
## 170       867.2631 837.6311  896.8950 821.9449  912.5813
## 171       894.5358 863.2718  925.7998 846.7217  942.3499
## 172       937.5825 904.7676  970.3974 887.3964  987.7686
## 173       970.7896 936.4939 1005.0854 918.3388 1023.2405
## 174       974.5540 938.8387 1010.2693 919.9322 1029.1759
## 175       903.3407 866.2602  940.4213 846.6310  960.0505
## 176       863.6771 825.2798  902.0743 804.9536  922.4006
## 177       842.0948 802.4245  881.7650 781.4243  902.7652
## 178       819.1237 778.2200  860.0274 756.5669  881.6805
## 179       808.2948 766.1938  850.3958 743.9069  872.6827
## 180       822.8870 779.6218  866.1522 756.7186  889.0554

Souvenir Shop Sales

SUV<-read.csv('monthly-sales-for-a-souvenir-sho.csv')
suv<-ts(SUV$Sales)
library(astsa)
library(forecast)
par(mfrow=c(2,2))

plot(suv, main='Monthly sales for a souvenir shop', ylab='', col='blue', lwd=3)
plot(log(suv), main='Log-transorm of sales', ylab='', col='red', lwd=3)
plot(diff(log(suv)), main='Differenced Log-transorm of sales', ylab='', col='brown', lwd=3)
plot(diff(diff(log(suv)),12), main='Log-transorm without trend and seasonaliy', ylab='', col='green', lwd=3)

data<-diff(diff((log(suv)),12))
acf2(data, 50)

##       [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8] [,9] [,10] [,11] [,12]
## ACF  -0.46  0.19 -0.17 -0.06  0.01  0.00 -0.07  0.07 0.09  0.02  0.00  -0.2
## PACF -0.46 -0.02 -0.11 -0.23 -0.13 -0.07 -0.20 -0.12 0.11  0.11  0.04  -0.2
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF   0.08 -0.05  0.07 -0.03  0.05 -0.02 -0.05  0.15 -0.24  0.18 -0.01 -0.07
## PACF -0.07 -0.01 -0.01 -0.05  0.02 -0.03 -0.19  0.13 -0.06  0.01  0.15 -0.09
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF   0.13 -0.21  0.15 -0.11  0.12 -0.01 -0.05 -0.04  0.14 -0.25  0.15 -0.10
## PACF  0.03 -0.16  0.04 -0.05  0.06  0.10 -0.12 -0.12  0.08 -0.15 -0.08 -0.05
##      [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF   0.09  0.05  0.01 -0.05 -0.04 -0.07  0.10 -0.01  0.03  0.02  0.03 -0.03
## PACF  0.00 -0.07  0.00 -0.07  0.01 -0.21  0.03  0.13 -0.05 -0.01  0.02  0.04
##      [,49] [,50]
## ACF   0.00 -0.05
## PACF -0.02 -0.02
d=1
DD=1
per=12
for(p in 1:2){
  for(q in 1:2){
    for(i in 1:2){
      for(j in 1:4){
        if(p+d+q+i+DD+j<=10){
          model<-arima(x=log(suv), order = c((p-1),d,(q-1)), seasonal = list(order=c((i-1),DD,(j-1)), period=per))
          pval<-Box.test(model$residuals, lag=log(length(model$residuals)))
          sse<-sum(model$residuals^2)
          cat(p-1,d,q-1,i-1,DD,j-1,per, 'AIC=', model$aic, ' SSE=',sse,' p-VALUE=', pval$p.value,'\n')
        }
      }
    }
  }
}
## 0 1 0 0 1 0 12 AIC= -11.60664  SSE= 3.432906  p-VALUE= 0.0001365566 
## 0 1 0 0 1 1 12 AIC= -16.09179  SSE= 2.97756  p-VALUE= 3.149952e-05 
## 0 1 0 0 1 2 12 AIC= -17.58234  SSE= 2.301963  p-VALUE= 0.0002456591 
## 0 1 0 0 1 3 12 AIC= -16.41016  SSE= 2.35266  p-VALUE= 0.0003392283 
## 0 1 0 1 1 0 12 AIC= -13.43083  SSE= 3.214065  p-VALUE= 4.083839e-05 
## 0 1 0 1 1 1 12 AIC= -17.76362  SSE= 2.399754  p-VALUE= 0.0001916581 
## 0 1 0 1 1 2 12 AIC= -15.99095  SSE= 2.349899  p-VALUE= 0.0002477783 
## 0 1 0 1 1 3 12 AIC= -14.74777  SSE= 2.302026  p-VALUE= 0.0004504596 
## 0 1 1 0 1 0 12 AIC= -27.78538  SSE= 2.643277  p-VALUE= 0.1742478 
## 0 1 1 0 1 1 12 AIC= -34.54538  SSE= 2.233424  p-VALUE= 0.2730783 
## 0 1 1 0 1 2 12 AIC= -33.6145  SSE= 2.109473  p-VALUE= 0.2830597 
## 0 1 1 0 1 3 12 AIC= -32.19273  SSE= 1.87789  p-VALUE= 0.270042 
## 0 1 1 1 1 0 12 AIC= -32.33192  SSE= 2.360507  p-VALUE= 0.2584529 
## 0 1 1 1 1 1 12 AIC= -34.0881  SSE= 1.842013  p-VALUE= 0.2843225 
## 0 1 1 1 1 2 12 AIC= -32.1017  SSE= 1.856342  p-VALUE= 0.28516 
## 1 1 0 0 1 0 12 AIC= -27.07825  SSE= 2.6747  p-VALUE= 0.2297871 
## 1 1 0 0 1 1 12 AIC= -34.98918  SSE= 2.209442  p-VALUE= 0.4633806 
## 1 1 0 0 1 2 12 AIC= -33.38623  SSE= 2.159411  p-VALUE= 0.4515394 
## 1 1 0 0 1 3 12 AIC= -31.54519  SSE= 2.121635  p-VALUE= 0.4390829 
## 1 1 0 1 1 0 12 AIC= -32.64858  SSE= 2.340077  p-VALUE= 0.4022223 
## 1 1 0 1 1 1 12 AIC= -33.48894  SSE= 2.125757  p-VALUE= 0.4442648 
## 1 1 0 1 1 2 12 AIC= -31.52137  SSE= 2.093128  p-VALUE= 0.4463096 
## 1 1 1 0 1 0 12 AIC= -26.17089  SSE= 2.624281  p-VALUE= 0.2507443 
## 1 1 1 0 1 1 12 AIC= -33.30647  SSE= 2.201798  p-VALUE= 0.4110139 
## 1 1 1 0 1 2 12 AIC= -31.68924  SSE= 2.151774  p-VALUE= 0.3820815 
## 1 1 1 1 1 0 12 AIC= -31.10127  SSE= 2.323818  p-VALUE= 0.3492746 
## 1 1 1 1 1 1 12 AIC= -32.69914  SSE= 1.823491  p-VALUE= 0.3092333
model<- arima(x=log(suv), order = c(1,1,0), seasonal = list(order=c(0,1,1), period=12))

plot(forecast(model))

forecast(model)
##     Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
##  85       9.600019  9.373968  9.826071  9.254303  9.945736
##  86       9.786505  9.533944 10.039066  9.400246 10.172764
##  87      10.329605 10.025423 10.633786  9.864399 10.794810
##  88      10.081973  9.746705 10.417240  9.569225 10.594720
##  89      10.008096  9.638604 10.377587  9.443007 10.573184
##  90      10.181170  9.783094 10.579245  9.572365 10.789974
##  91      10.439372 10.013362 10.865383  9.787845 11.090900
##  92      10.534857 10.083237 10.986477  9.844164 11.225551
##  93      10.613026 10.136886 11.089165  9.884833 11.341218
##  94      10.664526 10.165207 11.163846  9.900883 11.428170
##  95      11.096784 10.575248 11.618321 10.299163 11.894406
##  96      11.877167 11.334355 12.419979 11.047007 12.707326
##  97       9.932756  9.330373 10.535139  9.011491 10.854021
##  98      10.112194  9.475681 10.748707  9.138731 11.085657
##  99      10.658829  9.980844 11.336814  9.621940 11.695718
## 100      10.409423  9.696788 11.122058  9.319542 11.499304
## 101      10.336437  9.588666 11.084207  9.192821 11.480052
## 102      10.509064  9.728749 11.289378  9.315676 11.702452
## 103      10.767491  9.955449 11.579532  9.525580 12.009401
## 104      10.862863 10.020524 11.705202  9.574617 12.151109
## 105      10.941088 10.069390 11.812786  9.607940 12.274235
## 106      10.992560 10.092516 11.892605  9.616061 12.369060
## 107      11.424833 10.497280 12.352385 10.006263 12.843402
## 108      12.205208 11.250954 13.159461 10.745803 13.664613
a<-sarima.for(log(suv),12,1,1,0,0,1,1,12)

plot.ts(c(suv,exp(a$pred)), main='Monthly sales + Forecast', ylab='', col='blue', lwd=3)

USAccDeaths Dataset

plot(USAccDeaths)

We first get rid of the seasonal trend by differencing the values at the same month of each year. Plot the seasonally differenced time series in the code block below.

plot(diff(USAccDeaths,12),ylab="Seasonally Differenced Data")

There is still a clear upward trend.

We de-trend the seasonally differenced time series by taking non-seasonal differencing, diff(), and call the obtained time series ‘acData’. Obtain ACF and PACF of ‘acData’ in the code block below.

par(mfrow=c(2,1))
acData <- diff(USAccDeaths,12)
# obtain acf and pacf below
acf(acData)
pacf(acData)

We observe that:

We try few different models, and choose the model with smallest AIC: SARIMA\((0,1,1,0,1,1)_{12}\). If \(X_t\) = USAccDeaths, which of the following is/are the fitted model?

Forecasting using Simple Exponential Smoothing

rm(list=ls(all=TRUE))
rain.data <- scan("http://robjhyndman.com/tsdldata/hurst/precip1.dat",skip=1)
rain.ts <- ts(rain.data, start=c(1813))
#par(mfrow=c(2,1))
hist(rain.data, main="Annual London Rainfall 1813-1912",
xlab="rainfall in inches")

qqnorm(rain.data,main="Normal Plot of London Rainfall")
qqline(rain.data)

#par( mfrow=c(2,1) )
plot.ts(rain.ts, main="Annual London Rainfall 1813-1912",
xlab="year", ylab="rainfall in inches")

acf(rain.ts, main="ACF: London Rainfall")

library(forecast)
auto.arima(rain.ts)
## Series: rain.ts 
## ARIMA(0,0,0) with non-zero mean 
## 
## Coefficients:
##          mean
##       24.8239
## s.e.   0.4193
## 
## sigma^2 estimated as 17.76:  log likelihood=-285.25
## AIC=574.49   AICc=574.61   BIC=579.7

Exponential Smoothing

Form a new forecast by

  • weighting the previous forecast, \(x_n^{n-1}\)
  • updating with the fresh data point, \(x_n\)
  • \(\hat{x}_{n+1} = \alpha x_n + (1-\alpha)\hat{x}_n\)
#DIY

alpha <- 0.2
forecast.values <- NULL
n <- length(rain.data)

# Naive first forecast
forecast.values[1] <- rain.data[1]
# Loop to create all forecast values

for(i in 1:n){
  forecast.values[i+1] <- alpha*rain.data[i] + (1-alpha)*forecast.values[i]
  
paste("forecast for time",n+1,"=",forecast.values[n+1])
}
forecast.values[101]  # prediction for year 1913
## [1] 25.30941
# Now, using Holt-Winters algorithm

HoltWinters(rain.ts,beta=FALSE,gamma=FALSE)
## Holt-Winters exponential smoothing without trend and without seasonal component.
## 
## Call:
## HoltWinters(x = rain.ts, beta = FALSE, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.02412151
##  beta : FALSE
##  gamma: FALSE
## 
## Coefficients:
##       [,1]
## a 24.67819
rm(list=ls(all=TRUE))

devtools::install_github("FinYang/tsdl")
## Skipping install of 'tsdl' from a github remote, the SHA1 (56e09154) has not changed since last install.
##   Use `force = TRUE` to force installation
library(tsdl)
# https://pkg.yangzhuoranyang.com/tsdl/articles/tsdl.html

money.data <- subset(tsdl, 
  description = "Volume of money, ABS definition m1. Feb 1960 – Dec 1994")
money.data.ts <- money.data[[1]]  # List element
#money.data.ts[162] = 22458  # NA value for July 1973

# Note that there is a NA value for July 1973, which will cause an error.  We need to insert the average value based on June and August 1973, which is 22,458.

money.data.ts[162] = ( money.data.ts[161] + money.data.ts[163] ) / 2
# money.data.ts = ts(money.data[,2],start=c(1960,2) , frequency=12)
par(mfrow=c(3,1))
plot(money.data.ts, main="Time Plot of Volume of Money")
acf(money.data.ts, main="ACF of Volume of Money")
acf(money.data.ts, type="partial", main="PACF of Volume of Money")

m=HoltWinters(money.data.ts, gamma = FALSE)
m
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = money.data.ts, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.9387058
##  beta : 0.06190008
##  gamma: FALSE
## 
## Coefficients:
##       [,1]
## a 260937.7
## b   1723.3
plot(m,main="Holt-Winters Fitting of Money Volume with Optimal Parameters")