library(readr)
library(TSA)
library(tseries)
library(fUnitRoots)
# Reading data
delta_ozone <- read_csv("data1.csv", col_names = FALSE)
# Renaming column
names(delta_ozone) <- c('delta')
# Converting to time series
ts_delta_ozone <- ts(delta_ozone, start = 1927, end = 2016)
# Plotting the time series
plot(ts_delta_ozone, type = 'o',
main ="Time series plot of change in thickness of Ozone layer",
ylab = "Change in thickness",
xlab = "Year",
col = c('navy'))
Figure 1: Time series plot of change in thickness of ozone layer.
\[\mu = \beta_{0} + \beta_{1}t\]
# Testing a simple determininstic linear model
linear_model = lm(ts_delta_ozone~time(ts_delta_ozone))
summary(linear_model)
Call:
lm(formula = ts_delta_ozone ~ time(ts_delta_ozone))
Residuals:
Min 1Q Median 3Q Max
-4.7165 -1.6687 0.0275 1.4726 4.7940
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 213.720155 16.257158 13.15 <2e-16 ***
time(ts_delta_ozone) -0.110029 0.008245 -13.34 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.032 on 88 degrees of freedom
Multiple R-squared: 0.6693, Adjusted R-squared: 0.6655
F-statistic: 178.1 on 1 and 88 DF, p-value: < 2.2e-16
#plotting the fitted linear model
plot(ts_delta_ozone, type = 'o',
main ="Simple Linear model for change in thickness of Ozone Layer",
ylab = "change in thickness of Ozone Layer",
xlab = "Year",
col = c("navy"))
abline(linear_model, col = 'red')
Figure 2: Simple Linear model for change in thickness of Ozone Layer
Figure 3: Residual of linear regression model for change in ozone thickness
## Q-Q plot of the residuals for checking normality
qqnorm(y, main = "Q-Q Plot of residuals to check normality")
qqline(y, col = 2, lwd = 2, lty = 3)
Figure 4: Q-Q Plot of residuals to check normality
#Shapiro test for normality
shapiro.test(y)
Shapiro-Wilk normality test
data: y
W = 0.98733, p-value = 0.5372
Plotting Auto-correlation of the residuals of linear model
acf(y, main = "Autocorrelation plot of the residuals of linear model")
Figure 5: Autocorrelation plot of the residuals of linear model
\[\mu_{t} = \beta_{0} + \beta_{1}t + \beta_{2}t^2\]
# Quadratic model
t = time(ts_delta_ozone)
t2 = t^2
quad_model = lm(ts_delta_ozone ~ t + t2) # creating the quadratic model
summary(quad_model) # looking at the significance of quadratic model
Call:
lm(formula = ts_delta_ozone ~ t + t2)
Residuals:
Min 1Q Median 3Q Max
-5.1062 -1.2846 -0.0055 1.3379 4.2325
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.733e+03 1.232e+03 -4.654 1.16e-05 ***
t 5.924e+00 1.250e+00 4.739 8.30e-06 ***
t2 -1.530e-03 3.170e-04 -4.827 5.87e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.815 on 87 degrees of freedom
Multiple R-squared: 0.7391, Adjusted R-squared: 0.7331
F-statistic: 123.3 on 2 and 87 DF, p-value: < 2.2e-16
#plotting the quadratic curve
plot(
ts(fitted(quad_model), start = 1927),
ylab = 'Change in thickness of Ozone',
main = "Quadratic model for change in thickness of Ozone Layer",
ylim = c(-12, 3),
col = 'red'
)
#overlay the ozone layer data
lines(ts_delta_ozone,type="o")
Figure 6: Quadratic model for change in thickness of Ozone Layer
#residual analysis of quadratic model
res.quad_model = rstudent(quad_model)
plot(y = res.quad_model, x = as.vector(time(ts_delta_ozone)),
xlab = 'Year', ylab='Standardized Residuals',type='p',
main = "Residual of quadratic regression model for change in ozone thickness")
abline(h=mean(y), col='red')
Figure 7: Residual of quadratic regression model for change in ozone thickness
# Q-Q plot to visualize normality of residuals
y2 = res.quad_model
qqnorm(y2, main = "Q-Q Plot of residuals of quadratic model")
qqline(y2, col = 2, lwd = 2, lty = 3)
Figure 8: Q-Q Plot of residuals of quadratic model
# Shapiro test for normality of residuals of Quadratic Model
shapiro.test(y2)
Shapiro-Wilk normality test
data: y2
W = 0.98889, p-value = 0.6493
Plotting Auto-correlation of the residuals of quadratic model
acf(y2, main = "Autocorrelation plot of residuals of quadratic model")
Figure 9: Autocorrelation plot of residuals of quadratic model - There is still a significant correlation at lag 1, 3 and 4, indicating there’s still some trend in the residuals.
#set the cycles frequencies
f = 7
ts_cycle =ts(ts_delta_ozone,frequency =f)
ts_plot_cycle = plot(ts_cycle, type = 'l',
main ="Time series plot of change in thickness of Ozone layer\nin cycle of 7 years",
ylab = "Change in Ozone thickness",
xlab = "cycle = 7 years")
cycle = factor(rep(1:f, length.out = length(ts_cycle)), ordered = TRUE)
points(y=ts_cycle,x=time(ts_cycle), pch=as.character(cycle), col =2, cex = 1.15)
Figure 10: Time series plot of change in thickness of Ozone layer in cycle of 7 years
# Simple Harmoinic model
har.<- harmonic(ts_cycle,1)
harmonic_model =lm(ts_cycle~har.)
summary(harmonic_model)
Call:
lm(formula = ts_cycle ~ har.)
Residuals:
Min 1Q Median 3Q Max
-7.8294 -1.8422 0.7481 2.4701 5.8635
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.1949 0.3700 -8.636 2.5e-13 ***
har.cos(2*pi*t) 0.7386 0.5226 1.413 0.161
har.sin(2*pi*t) -0.2544 0.5239 -0.486 0.628
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.509 on 87 degrees of freedom
Multiple R-squared: 0.02487, Adjusted R-squared: 0.002453
F-statistic: 1.109 on 2 and 87 DF, p-value: 0.3344
# Harmonic model with trend component
linear_harmonic_model =lm(ts_cycle~t+har.)
summary(linear_harmonic_model)
Call:
lm(formula = ts_cycle ~ t + har.)
Residuals:
Min 1Q Median 3Q Max
-4.2187 -1.6011 -0.0105 1.1970 4.2311
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 213.141767 15.988761 13.331 <2e-16 ***
t -0.109732 0.008109 -13.532 <2e-16 ***
har.cos(2*pi*t) 0.560458 0.297409 1.884 0.0629 .
har.sin(2*pi*t) -0.396451 0.298047 -1.330 0.1870
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.995 on 86 degrees of freedom
Multiple R-squared: 0.6884, Adjusted R-squared: 0.6775
F-statistic: 63.32 on 3 and 86 DF, p-value: < 2.2e-16
res.linear_harmonic_model = rstudent(linear_harmonic_model)
shapiro.test(res.linear_harmonic_model)
Shapiro-Wilk normality test
data: res.linear_harmonic_model
W = 0.98238, p-value = 0.2631
h=5 # 5 step ahead forecasting
new_data = data.frame(t = seq((min(t)), (max(t)+h), 1))
new_data$t2 = new_data$t^2
#making the predictions
pred1 = predict(quad_model, new_data, interval = "prediction")
#getting just the predictions for year 2017 - 2021
pred1_table = data.frame(Year = seq(2017, 2021, 1),pred1[91:95,])
colnames(pred1_table) = c("Year", "Prediction", "LowerCI", "UpperCI")
pred1_table
#graphing it out
plot(ts_delta_ozone, type = 'o',
main ="Forecasting 5 years ahead with quadratic model",
ylab = "Change in thickness of Ozone",
xlab = "Year",
xlim=c(1927,2021),
ylim = c(-15,4))
lines(ts(as.vector(pred1[,1]), start = 1927), col="red", type="l")
lines(ts(as.vector(pred1[,2]), start = 1927), col="green", type="l")
lines(ts(as.vector(pred1[,3]), start = 1927), col="green", type="l")
legend("topright", lty=1, pch=1, col=c("black","green","red"), text.width
= 18,
c("Data","5% forecast limits", "Forecasts"))
Figure 11: Forecasting 5 years ahead with quadratic model
From the linear and quadratic regression in Task 1 we have already determined that there is clear downward trend in the data, and this also implies that we have non-stationarity in the data. There will be auto regressive behaviour in the data for which we’ll do further plotting to reaffirm our assumption from initial plots.
#plotting the the acf
acf(ts_delta_ozone, ci.type = 'ma', main = "ACF of yearly change in Ozone thickness")
Figure 12: ACF of yearly change in Ozone thickness
#plotting the the pacf
pacf(ts_delta_ozone, main = "PACF of yearly change in Ozone thickness")
Figure 13: PACF of yearly change in Ozone thickness
# Extracting lag
ar(ts_delta_ozone)
Call:
ar(x = ts_delta_ozone)
Coefficients:
1 2 3 4
0.9807 -0.1417 -0.2581 0.2985
Order selected 4 sigma^2 estimated as 3.242
# Conducting ADF test
adfTest(ts_delta_ozone, lags = 4, type = "nc", title = 'ADF Test: No constant nor time trend')
Title:
ADF Test: No constant nor time trend
Test Results:
PARAMETER:
Lag Order: 4
STATISTIC:
Dickey-Fuller: 0.5699
P VALUE:
0.7942
Description:
Sun Apr 18 16:41:45 2021 by user: shekh
adfTest(ts_delta_ozone, lags = 4, type = "c", title = 'ADF Test: constant no time trend')
Title:
ADF Test: constant no time trend
Test Results:
PARAMETER:
Lag Order: 4
STATISTIC:
Dickey-Fuller: -0.4361
P VALUE:
0.8924
Description:
Sun Apr 18 16:41:45 2021 by user: shekh
adfTest(ts_delta_ozone, lags = 4, type = "ct", title = 'ADF Test: constant and with time trend')
Title:
ADF Test: constant and with time trend
Test Results:
PARAMETER:
Lag Order: 4
STATISTIC:
Dickey-Fuller: -3.2376
P VALUE:
0.0867
Description:
Sun Apr 18 16:41:45 2021 by user: shekh
From the initial plots of the time series (Figure 2) we can see that the variance in the data does not change significantly. But it is worthwhile to do a check whether this inference we made just by looking at the time series is true.
#checking for Box_Cox transformation best lambda
ts_transf <- BoxCox.ar(ts_delta_ozone+13)
title(main = "Log-likelihood versus the values of lambda")
Figure 14: Log-likelihood versus the values of lambda after BoxCox transformation
Since the confidence interval of \(\lambda\) includes the value 1 this suggests that power or log transformation is not required.
We have established that there is trend in the data as well as the fact that the variance is not changing at a significant amount to apply power or log transformation, therefore we take first difference of the data.
Figure 15: First Difference of yearly changes in thickess of Ozone Layer
Call:
ar(x = ts_diff)
Coefficients:
1 2 3 4 5 6
-0.1976 -0.2628 -0.6019 -0.3064 -0.2253 -0.3045
Order selected 6 sigma^2 estimated as 2.232
Title:
ADF Test with No constant nor time trend
Test Results:
PARAMETER:
Lag Order: 6
STATISTIC:
Dickey-Fuller: -5.0757
P VALUE:
0.01
Description:
Sun Apr 18 16:59:05 2021 by user: shekh
Title:
ADF Test with constant no time trend
Test Results:
PARAMETER:
Lag Order: 6
STATISTIC:
Dickey-Fuller: -5.5901
P VALUE:
0.01
Description:
Sun Apr 18 16:59:05 2021 by user: shekh
Title:
ADF Test with constant and with time trend
Test Results:
PARAMETER:
Lag Order: 6
STATISTIC:
Dickey-Fuller: -5.7327
P VALUE:
0.01
Description:
Sun Apr 18 16:59:05 2021 by user: shekh
# Plotting ACF and PACF of differenced data
acf(ts_diff, main = 'ACF of differenced data')
Figure 16: ACF of differenced data
pacf(ts_diff, main = 'PACF of differenced data')
Figure 17: PACF of differenced data
#calculating the eacf
eacf(ts_diff, ar.max = 10, ma.max = 6)
AR/MA
0 1 2 3 4 5 6
0 o o x o o o x
1 x o x o o o o
2 x o x o o o x
3 x o x o o x o
4 x o o x o x o
5 x x x x o x o
6 o o o x x o o
7 o o o x o o o
8 x x o x o o o
9 o x o x o o o
10 x o x o x o o
#creating the BIC plot
There were 18 warnings (use warnings() to see them)
res = armasubsets(ts_diff, nar = 9, nma = 9, y.name='ar', ar.method = 'ols')
Reordering variables and trying again:
plot(res)
title(main = "BIC table of differenced data", line = 6)
Figure 18: BIC table of differenced data #### Observation - The BIC table’s shaded columns correspond to AR(7) and MA(6) therefore a model ARIMA(7,1,6) can also be included as potential model.
Ritchie, Hannah, and Max Roser. 2020. “Ozone Layer.” Our World in Data.