Student detail

Executive Statement

Load required packages

library(TSA)
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(fUnitRoots)
## Loading required package: timeDate
## 
## Attaching package: 'timeDate'
## The following objects are masked from 'package:TSA':
## 
##     kurtosis, skewness
## Loading required package: timeSeries
## Loading required package: fBasics
library(tseries)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:timeSeries':
## 
##     time<-
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(forecast)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.Arima       TSA     
##   fitted.fracdiff    fracdiff
##   plot.Arima         TSA     
##   residuals.fracdiff fracdiff

Load required data and convert it into time series

egg<- read.csv("eggs.csv",header = T)
class(egg)
## [1] "data.frame"
egg.ts <- ts(as.vector(egg[2]), start=1981)
class(egg.ts)
## [1] "ts"

Plot time series data and check for stationarity

plot(egg.ts , type='o', main='Time series of yearly egg depositions of Lake Huron Bloasters', ylab='egg depositions(in millions)')

par(mfrow=c(1,2)) 
acf(egg.ts)
pacf(egg.ts)

par(mfrow=c(1,1)) 
adf.test(egg.ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  egg.ts
## Dickey-Fuller = -2.0669, Lag order = 2, p-value = 0.5469
## alternative hypothesis: stationary

Fit quadratic model

t = time(egg.ts)
t2 = t^2
model1 = lm(egg.ts~ t + t2) 
summary(model1)
## 
## Call:
## lm(formula = egg.ts ~ t + t2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50896 -0.25523 -0.02701  0.16615  0.96322 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -4.647e+04  2.141e+04  -2.170   0.0491 *
## t            4.665e+01  2.153e+01   2.166   0.0494 *
## t2          -1.171e-02  5.415e-03  -2.163   0.0498 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4092 on 13 degrees of freedom
## Multiple R-squared:  0.5932, Adjusted R-squared:  0.5306 
## F-statistic: 9.479 on 2 and 13 DF,  p-value: 0.00289
plot(ts(fitted(model1)),ylab='Yearly egg depositions', main = "Fitted quadratic curve to Yearly egg depositions", ylim=c(0,2.2))
lines(as.vector(egg.ts),type="o")

res.model1 = rstudent(model1)
plot(y = res.model1, x = as.vector(time(egg.ts)),xlab = 'Time', ylab='Standardized Residuals',type='o', main='residual plot for model 1')
abline(h=0)

qqnorm(res.model1)
qqline(res.model1, col = 2)

shapiro.test(res.model1)
## 
##  Shapiro-Wilk normality test
## 
## data:  res.model1
## W = 0.87948, p-value = 0.03809
acf(res.model1)

Transform time series

egg.ts.t = BoxCox.ar(egg.ts, method='yule-walker')
## Warning in arima0(x, order = c(i, 0L, 0L), include.mean = demean): possible
## convergence problem: optim gave code = 1

## Warning in arima0(x, order = c(i, 0L, 0L), include.mean = demean): possible
## convergence problem: optim gave code = 1

egg.ts.t$ci
## [1] 0.1 0.8
lambda = 0.5 
BC.egg = (egg.ts^lambda-1)/lambda
qqnorm(BC.egg)
qqline(BC.egg, col = 2)

shapiro.test(BC.egg)
## 
##  Shapiro-Wilk normality test
## 
## data:  BC.egg
## W = 0.96562, p-value = 0.7636

First differencing of time series

egg.diff = diff(BC.egg)
par(mfrow=c(1,1))
plot(egg.diff,type='o',ylab='Yearly egg depositions')

adf.test(egg.diff)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  egg.diff
## Dickey-Fuller = -3.6096, Lag order = 2, p-value = 0.04931
## alternative hypothesis: stationary
par(mfrow=c(1,2))
acf(egg.diff)
pacf(egg.diff)

par(mfrow=c(1,1))

Second differencing of time series

egg.diff2 = diff(BC.egg, differences = 2)
par(mfrow=c(1,1))
plot(egg.diff2,type='o',ylab="early egg depositions")

adf.test(egg.diff2)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  egg.diff2
## Dickey-Fuller = -3.1108, Lag order = 2, p-value = 0.1492
## alternative hypothesis: stationary
par(mfrow=c(1,2))
acf(egg.diff2)
pacf(egg.diff2)

par(mfrow=c(1,1))

Use EACF and BIC table to find other possible models

eacf(egg.diff2, ar.max = 3, ma.max = 3)
## AR/MA
##   0 1 2 3
## 0 o o o o
## 1 o o o o
## 2 o o o o
## 3 o o o o
res = armasubsets(y=egg.diff2,nar=3,nma=3,y.name='test',ar.method='ols')
## Warning in ar.ols(x, aic = aic, order.max = order.max, na.action =
## na.action, : model order: 7 singularities in the computation of the
## projection matrix results are only valid up to model order 6
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax,
## force.in = force.in, : 2 linear dependencies found
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax,
## force.in = force.in, : nvmax reduced to 4
plot(res)
## Warning in log(vr): NaNs produced
## Warning in log(vr): NaNs produced

## Warning in log(vr): NaNs produced

Candidate models summary

Fit ARIMA(1,2,0)

model.120=arima(egg.ts, order = c(1,2,0), method='CSS')
coeftest(model.120)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.45944    0.23810 -1.9296  0.05365 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.120=arima(egg.ts, order = c(1,2,0), method='ML')
coeftest(model.120)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.42966    0.22743 -1.8892  0.05886 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit ARIMA(1,2,1)

model.121=arima(egg.ts, order = c(1,2,1), method='CSS')
coeftest(model.121)
## 
## z test of coefficients:
## 
##      Estimate Std. Error  z value Pr(>|z|)    
## ar1  0.073817   0.284315   0.2596   0.7951    
## ma1 -1.132556   0.074796 -15.1419   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.121=arima(egg.ts, order = c(1,2,1), method='ML')
coeftest(model.121)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1  0.071763   0.269250  0.2665    0.7898    
## ma1 -0.999998   0.236871 -4.2217 2.425e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit ARIMA(0,2,1)

model.021=arima(egg.ts, order = c(0,2,1), method='CSS')
coeftest(model.021)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ma1 -1.066739   0.071847 -14.847 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(rstandard(model.021), ylab='Standardized residuals', type='o', main='Time series plot of standardized residuals
     for the second difference of the Box-Cox transformed egg deposition series')
abline(h=0)

e1=residuals(model.021)
qqnorm(e1)
qqline(e1)

shapiro.test(e1)
## 
##  Shapiro-Wilk normality test
## 
## data:  e1
## W = 0.92621, p-value = 0.2121
par(mfrow=c(1,2))
acf(residuals(model.021))
pacf(residuals(model.021))

par(mfrow=c(1,1))
Box.test(residuals(model.021), lag=6, type="Ljung-Box", fitdf=0)
## 
##  Box-Ljung test
## 
## data:  residuals(model.021)
## X-squared = 7.7309, df = 6, p-value = 0.2585
tsdiag(model.021, gof=15, omit.initial=F)

Fit model ARIMA(3,2,2)

model.322=arima(egg.ts, order = c(3,2,2), method='CSS')
coeftest(model.322)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.139922   0.348205 -0.4018  0.68780  
## ar2 -0.198602   0.258520 -0.7682  0.44235  
## ar3 -0.544728   0.277057 -1.9661  0.04928 *
## ma1 -0.683391   0.360419 -1.8961  0.05795 .
## ma2  0.045948   0.329455  0.1395  0.88908  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.322=arima(egg.ts, order = c(3,2,2), method='ML')
coeftest(model.322)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1  0.132356   0.472342  0.2802  0.77932  
## ar2 -0.099038   0.323371 -0.3063  0.75940  
## ar3 -0.384399   0.314000 -1.2242  0.22088  
## ma1 -0.996970   0.589396 -1.6915  0.09074 .
## ma2  0.199121   0.472870  0.4211  0.67369  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit model ARIMA(3,2,0)

model.320=arima(egg.ts, order = c(3,2,0), method='CSS')
coeftest(model.320)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.53949    0.23534 -2.2924  0.02188 *
## ar2 -0.33499    0.27022 -1.2397  0.21508  
## ar3 -0.45802    0.26462 -1.7309  0.08348 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.323=arima(egg.ts, order = c(3,2,0), method='ML')
coeftest(model.320)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.53949    0.23534 -2.2924  0.02188 *
## ar2 -0.33499    0.27022 -1.2397  0.21508  
## ar3 -0.45802    0.26462 -1.7309  0.08348 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model selection

Prediction using selected model

predict(model.021, n.ahead=5, newxreg = NULL, se.fit=TRUE)
## Warning in predict.Arima(model.021, n.ahead = 5, newxreg = NULL, se.fit =
## TRUE): MA part of model is not invertible
## $pred
## Time Series:
## Start = 1997 
## End = 2001 
## Frequency = 1 
## [1] 1.080342 1.136584 1.192826 1.249068 1.305310
## 
## $se
## Time Series:
## Start = 1997 
## End = 2001 
## Frequency = 1 
## [1] 0.4567535 0.6722845 0.8553529 1.0243099 1.1858809
fit=Arima(egg.ts, c(0,2,1), lambda = 0.5)
plot(forecast(fit, h=5))

Conclusion