# Reading the file in R
Ozone_layer <- read.csv("~/Desktop/Time Series/Assignment 1/data1.csv",header = FALSE)
# Setting rownames as a sequesnce from 1927 to 2016
rownames(Ozone_layer) <- seq(from=1927, to=2016)
# Checking the class of the dataset
class(Ozone_layer)
## [1] "data.frame"
# Converting the dataframe to time series object
Ozone_layer1 <- ts(as.vector(as.matrix(t(Ozone_layer))), start=1927, end=2016, frequency=1)
# Making sure the data is a class of time series
class(Ozone_layer1)
## [1] "ts"
# Plotting the time series
plot(Ozone_layer1,col=c("blue"),type='o',ylab='Thickness Of Ozone Layer (Dobson Units)',xlab='Time (Years)')
legend( "bottomleft",lty=1, bty = "n" ,text.width = 12, col=c("blue"),
c(" Change in Thickness of Ozone Layer"))
A scatterplot and correlation of neighbouring measurments are shown below:
#ScatterPlot
plot(y=Ozone_layer1,x=zlag(Ozone_layer1),col=c("blue"),xlab = "Previous Year Thickness of Ozone Layer",main = "Scatter PLot of Ozone Layer Thickness")
y<- Ozone_layer1 #Read the abundance data into y
x<- zlag(Ozone_layer1) #Generate first lag of the abundance series
index=2:length(x) # Create an index to get rid of the first NA values
cor(y[index],x[index]) # Calculate correlation between x and y.
## [1] 0.8700381
As expected, we observe high correlation between succeeding thickness of ozone layer over the years.
The deterministic linear trend model is expressed as follows:
\[ \mu_t = \beta_0+\beta_1t\] where: \[ \beta_0 = Intercept~~~~~~\beta_1= Slope~of ~Linear ~Trend \]
##
## Call:
## lm(formula = Ozone_layer1 ~ time(Ozone_layer1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7165 -1.6687 0.0275 1.4726 4.7940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 213.720155 16.257158 13.15 <2e-16 ***
## time(Ozone_layer1) -0.110029 0.008245 -13.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.032 on 88 degrees of freedom
## Multiple R-squared: 0.6693, Adjusted R-squared: 0.6655
## F-statistic: 178.1 on 1 and 88 DF, p-value: < 2.2e-16
From the summary statistics we conclude: - The estimates of slopes and intercepts are -0.110029 and 213.720155. Here the slope is statistically significant at 5% significance level. The trend line is plotted over the time series in the following plot. - According to multiple R^2, about 66.93% of the variation in the Ozone thickness series is explained by the linear trend. - The adjusted version of R^2 provides an approximately unbiased extimate of true r^2. Hence the value is 66.55% in this case.
The estimator or predictor of unobserved stochastic component,
\[ X_t = Y_t-\mu_t \] is called residual corresponding to the t th observation.
res.model1 = rstudent(model1)
plot(y = res.model1, x = as.vector(time(Ozone_layer1)),xlab = 'Time', ylab='Standardized Residuals',type='p',col=c("blue"),main = "Plot of Residuals over Time")
abline(h=0)
#ACF of Standardized Residuals
acf(res.model1,main="ACF of Standardized Residuals",col=c("blue"))
#Histogram
hist(res.model1,xlab='Standardized Residuals')
#QQplot
y=rstudent(model1)
qqnorm(y,col=c("blue"))
qqline(y)
#Shapiro Test
shapiro.test(y)
##
## Shapiro-Wilk normality test
##
## data: y
## W = 0.98733, p-value = 0.5372
The deterministic Quadractic trend model is expressed as follows: \[ \mu_t = \beta_0+\beta_1t+\beta_2t^2\]
where: \[ \beta_0 = Intercept ~~~~ \beta_1 = Linear~Trend ~~~~~\beta_1 = Quadratic~Trend~in~Time\]
t = time(Ozone_layer1)
t2 = t^2
model2 = lm(Ozone_layer1~ t + t2) # label the quadratic trend model as model1
summary(model2)
##
## Call:
## lm(formula = Ozone_layer1 ~ t + t2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1062 -1.2846 -0.0055 1.3379 4.2325
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.733e+03 1.232e+03 -4.654 1.16e-05 ***
## t 5.924e+00 1.250e+00 4.739 8.30e-06 ***
## t2 -1.530e-03 3.170e-04 -4.827 5.87e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.815 on 87 degrees of freedom
## Multiple R-squared: 0.7391, Adjusted R-squared: 0.7331
## F-statistic: 123.3 on 2 and 87 DF, p-value: < 2.2e-16
From the summary statistics we conclude: - According to the p-values, the quadratic trend term is found significant so we reject the null hypothesis and the value of multiple R-squared is 0.7331 which is higher than that of the linear model observed earlier.
Fitted Quadratic Trend is shown below:
plot(ts(fitted(model2)), ylim=c(min(c(fitted(model2),
as.vector(Ozone_layer1))),max(c(fitted(model2),as.vector(Ozone_layer1)))),col=c('red'), ylab='y',
main = "Fitted quadratic curve to Ozone Layer Depletion")
lines(as.vector(Ozone_layer1),type="o",col="blue")
legend ("topright", lty = 1, pch = 1, col = c("blue","red"), text.width = 25,
c("Time Series Plot","Quadratic Trend Line"))
res.model2 = rstudent(model2)
plot(y = res.model2, x = as.vector(time(Ozone_layer1)),xlab = 'Time', ylab='Standardized Residuals',type='p',main = 'Time Series Plot Of Residuals',col=c('blue'))
abline(h=0)
plot(y=rstudent(model2),x=as.vector(time(Ozone_layer1)),xlab='Time',ylab ='Standardized Residuals',type='o',main = 'Time Series Plot Of Residuals',col=c('blue'))
#ACF of Standardized Residuals
acf(rstudent(model2),main="ACF of Standardized Residuals",col=c('blue'))
#Histogram
hist(rstudent(model2),xlab='Standardized Residuals')
#QQplot
z=rstudent(model2)
qqnorm(y,col=c('blue'))
qqline(y,col=c('red'))
#Shapiro Test
shapiro.test(z)
##
## Shapiro-Wilk normality test
##
## data: z
## W = 0.98889, p-value = 0.6493
A Forcasting for next 5 years with quadractic trend:
t = c(2017,2018,2019,2020,2021)
t2 = t^2
Ozone_forcast= data.frame(t,t2)
Forcast= predict(model2,Ozone_forcast,interval ="prediction")
print(Forcast)
## fit lwr upr
## 1 -10.34387 -14.13556 -6.552180
## 2 -10.59469 -14.40282 -6.786548
## 3 -10.84856 -14.67434 -7.022786
## 4 -11.10550 -14.95015 -7.260851
## 5 -11.36550 -15.23030 -7.500701
plot(Ozone_layer1,type='o',xlim = c(1927,2021),ylim=c(-15,10),ylab='Ozone Layer Thickness',main="Time series plot for Ozone Layer Thickness" )
#We need to convert forcast to time series object strating from first
#We do this for all colums of forcast
lines(ts(as.vector(Forcast[,1]),start = 2017),col="red",type="l")
lines(ts(as.vector(Forcast[,2]),start = 2017),col="blue",type="l")
lines(ts(as.vector(Forcast[,3]),start = 2017),col="blue",type="l")
legend("topright", lty=1, pch=1, col=c("black","blue","red"), text.width =24,
c("Data","5% Forecast limits", "Forecasts"))