##Importing Libraries and Data
#Import Libraries
library("TSA")
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library("forecast")
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.Arima TSA
## fitted.fracdiff fracdiff
## plot.Arima TSA
## residuals.fracdiff fracdiff
library("lmtest")
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("ggplot2")
##Time Series Plot
#Importing Data
ozone=read.csv('data1.csv',col.names = "Thickness of Ozone Layer in Dobson Units",header = FALSE)
class(ozone)
## [1] "data.frame"
#Converting the data into Time Series format
ozone_ts=ts(ozone,start=c(1927,1))
#Visualizing the Time Series Plot
plot(ozone_ts,xlab="Year",ylab="Dobson Units",type='o', main='Time Series Plot of Thickness of Ozone Layer in Dobson Units')
grid()
~The above time series plot shows a moving average trend with a downward behaviour.We see that with years passing by, the thickness of the ozone layer tends to decrease.The thickness was found to be an avergae of 0 dobson units for years 1927-2016 approximately and it went on reducing almost upto -12 dobson units for year 2016.
~In the series we can observer smaller and larger variances.For example, we see smaller variances around years 1920s,1970s, etc and larger variations around 1950s and 1990s.
~The plot does not show any repeating patterns, hence we can say that there is no seasonality or cyclic trend.
~We can also note that there is some downward intervention i.e., the changing points.
#Autocorrelation between consecutive years
#For Explaining Autocorrelation between previous year values and current years.
plot(y=ozone_ts,x=zlag(ozone_ts),ylab = "Dobson Units",xlab = "Previous Year Dobson Units",main = "Scatter Plot of Ozone Thicknes for consecutive years")
grid()
y=ozone_ts #Reading the Data in y
x=zlag(ozone_ts) #Generating first lag for data
index=2:length(x) #Creating index to remove NA Values
cor(y[index],x[index]) #Calculating correlation
## [1] 0.8700381
From the scatter plot we se a strong positive correlation between the ozone layer thickness for consecutive years. We see a correlation of 87% between consecutive years.
##Modeling and Diagnostic Checking #Linear Model
#Linear Model
Ozone_Linear_Model=lm(ozone_ts~time(ozone_ts))
summary(Ozone_Linear_Model)
##
## Call:
## lm(formula = ozone_ts ~ time(ozone_ts))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7165 -1.6687 0.0275 1.4726 4.7940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 213.720155 16.257158 13.15 <2e-16 ***
## time(ozone_ts) -0.110029 0.008245 -13.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.032 on 88 degrees of freedom
## Multiple R-squared: 0.6693, Adjusted R-squared: 0.6655
## F-statistic: 178.1 on 1 and 88 DF, p-value: < 2.2e-16
From the summary of the linear model we see that the p-values for slope and intercept are less than 0.05%, also according to the multiple r-squared, about 66.9% of the variation is explained by the linear time trend.
#Fitting Linear Line in the model
plot(ozone_ts,type='o',ylab='Dobsons Unit',main="Time Series plot")
abline(Ozone_Linear_Model)#Adding the fitted least squares line from the linear model
#Residual Analysis-Linear Model #Standardized Residuals
#Standardized Residuals
plot(y=rstudent(Ozone_Linear_Model),x=as.vector(time(ozone_ts)),xlab='Time',ylab='Standardized Residuals',type='o',main='Time Series Plot of Residuals')
grid()
~From the time Series plot we see that the residuals are between -3 and 3 and hence we can assume that there is no problem with the linear model.
~No possible trend can be seen from the plot.
~Randomness of residuals can be observed.
#QQ Plot
#QQ Plot of Residuals
Ozone_QQ = rstudent(Ozone_Linear_Model)
qqnorm(Ozone_QQ)
qqline(Ozone_QQ,col=2,lwd=1,lty=2)
From the QQ Plot we see that most of the residuals fall exactly on the line, except for few values at the start and end.
#Histogram
#Histogram of Residuals
ozone_hist=hist(rstudent(Ozone_Linear_Model),xlab ='Standardized Residual',main ='Histogram of Standardized Residuals',ylim = c(0,0.5),freq = FALSE)
curve(dnorm(x, mean=mean(rstudent(Ozone_Linear_Model)), sd=sd(rstudent(Ozone_Linear_Model))), col="blue", lwd=2, add=TRUE, yaxt="n")
From the Histogram we can see that the standardised residuals are normally distributed within the range -3 and 3 and can also be seen that it is nearly symmetric.
#Shapiro-Wilk Test
#Hypothesis Testing
#Shapiro-Wilk Test
shapiro.test(rstudent(Ozone_Linear_Model))
##
## Shapiro-Wilk normality test
##
## data: rstudent(Ozone_Linear_Model)
## W = 0.98733, p-value = 0.5372
We get the p-value of 0.5372, which is greater tha 0.05.Hence we fail to reject the null hypothesis and the stochastic component of this model is distributed normally and state the independence of stochastic component.
#Sample Autocorrelation function
#Sample Autocorrelation function
acf(rstudent(Ozone_Linear_Model), main = 'ACF of Standardized residuals',lag.max = 50)
From the acf plot we observe significant autocorrelation values higher than the confidence bound at 4 lags.
#Quadratic Model
#Quadratic Trend Model
t=time(ozone_ts)
t2=t^2
Ozone_Quadratic_Model=lm(ozone_ts~t+t2)
summary(Ozone_Quadratic_Model)
##
## Call:
## lm(formula = ozone_ts ~ t + t2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1062 -1.2846 -0.0055 1.3379 4.2325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.733e+03 1.232e+03 -4.654 1.16e-05 ***
## t 5.924e+00 1.250e+00 4.739 8.30e-06 ***
## t2 -1.530e-03 3.170e-04 -4.827 5.87e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.815 on 87 degrees of freedom
## Multiple R-squared: 0.7391, Adjusted R-squared: 0.7331
## F-statistic: 123.3 on 2 and 87 DF, p-value: < 2.2e-16
From the summary of the Quadratic Trend model we see that the p-values for intercept,linear trend term and quadratic trend term are less than 0.05%, hence we can say that the model is significant with a good r-squared value at 73.31%, which is slightly better than the linear model.
#Fitted quadtraic trend is given as:
plot(ts(fitted(Ozone_Quadratic_Model)),
ylim=c(min(c(fitted(Ozone_Quadratic_Model),as.vector(ozone_ts))),
max(c(fitted(Ozone_Quadratic_Model),as.vector(ozone_ts)))),
ylab='Dobson Units',xlab='Timme in Years',main='Change in Thickness of Ozone Layer')
lines(as.vector(ozone_ts),type = "o")
#Residual Analysis-Quadratic Trend Model #Standardized Residuals
#Standardized Residuals
plot(y=rstudent(Ozone_Quadratic_Model),x=as.vector(time(ozone_ts)),xlab='Time',ylab='Standardized Residuals',type='o',main='Time Series Plot of Residuals')
grid()
~From the time Series plot we see that the residuals are between -3 and 3 and hence we can assume that there is no problem with the Qyadratic model.
~No possible trend can be seen from the plot.
~Randomness of residuals can be observed.
#QQ Plot
#QQ Plot of Residuals
Ozone_QQ = rstudent(Ozone_Quadratic_Model)
qqnorm(Ozone_QQ)
qqline(Ozone_QQ,col=2,lwd=1,lty=2)
From the QQ Plot we see that most of the residuals fall exactly on the line, except for few values at the start and end.
#Histogram
#Histogram of Residuals
ozone_hist=hist(rstudent(Ozone_Quadratic_Model),xlab ='Standardized Residual',main ='Histogram of Standardized Residuals',ylim = c(0,0.5),freq = FALSE)
curve(dnorm(x, mean=mean(rstudent(Ozone_Quadratic_Model)), sd=sd(rstudent(Ozone_Quadratic_Model))), col="green", lwd=2, add=TRUE, yaxt="n")
From the Histogram we can see that the standardised residuals are normally distributed within the range -3 and 3 and can also be seen that it is nearly symmetric.
#Shapiro-Wilk Test
#Hypothesis Testing
#Shapiro-Wilk Test
shapiro.test(rstudent(Ozone_Quadratic_Model))
##
## Shapiro-Wilk normality test
##
## data: rstudent(Ozone_Quadratic_Model)
## W = 0.98889, p-value = 0.6493
We get the p-value of 0.6493, which is greater tha 0.05.Hence we fail to reject the null hypothesis and the stochastic component of this model is distributed normally and state the independence of stochastic component.
#Sample Autocorrelation function
#Sample Autocorrelation function
acf(rstudent(Ozone_Quadratic_Model), main = 'ACF of Standardized residuals',lag.max = 50)
From the acf plot we observe significant autocorrelation values higher than the confidence bound at 6 lags.
##Forecasting
#Forecasting
h = 5
x = seq((as.vector(tail(t,1))+1), (as.vector(tail(t,1))+h))
pred = data.frame(t = x, t2 = x^2)
forecasts = predict(Ozone_Quadratic_Model, pred, interval = "prediction")
print(forecasts)
## fit lwr upr
## 1 -10.34387 -14.13556 -6.552180
## 2 -10.59469 -14.40282 -6.786548
## 3 -10.84856 -14.67434 -7.022786
## 4 -11.10550 -14.95015 -7.260851
## 5 -11.36550 -15.23030 -7.500701
plot(ozone_ts, xlim = c(1927,2021), ylim = c(-15,5),
ylab = 'Dobson Units',
xlab = 'Years',
main = 'Ozone Layer Thickness')
lines(ts(as.vector(forecasts[,1]), start = 2017), col="red", type="l")
lines(ts(as.vector(forecasts[,2]), start = 2017), col="blue", type="l")
lines(ts(as.vector(forecasts[,3]), start = 2017), col="blue", type="l")
legend("topright", lty=1, pch=1, col=c("black","blue","red"), text.width = 20,
c("Data","5% forecast limits", "Forecasts"),
bty = 'n', cex=0.8,lwd=1, y.intersp = 0.8)
grid()
We see that the forecast is following the downward trend pattern perfectly with 5% forecast levels.
##Summary In this task, we analysed the thickness of the ozone layer time series data through simple visualizations.We saw a strong positive correlation of 87% between the ozone layer thickness for consecutive years. We then applied Linear and Quadratic Model on the data to see it’s accuracy.Since the plot did not show any repeating patterns,seasonality or cyclic trend, we did not apply harmonic and cosine models on the data.Then we performed Diagnostic Checking on residuals of the models using QQ plot, Histogram, Shapiro-Wilk Test and AcF functions.Since, Quadratic Model was able to capture most of the time series data and fitted properly, future values for next 5 years are predicted using Forecasting.