library(ggplot2)
library(readxl)
# Set directory for data file
# load data Table B.4 for Exercise 2.2
AppendixB_4 <- read_excel("B4_Data.xlsx")
# change data as data frame
my.data <- as.data.frame(AppendixB_4)
# Check the column name then replaced with suitable one
colnames(my.data)
## [1] "Year" "Production, thousand lbs"
# column name changed into a small and suitable one, which is easy to type in coding
colnames(my.data) = c("Year", "Production")
# load data Table B.6 for Exercise 2.2
AppendixB_6 <- read_excel("B6_Data.xlsx")
# change data as data frame
my.data2 <- as.data.frame(AppendixB_6)
colnames(my.data2) # Check the column name then replaced with suitable one
## [1] "Year" "Anomaly, C" "CO2, ppmv"
colnames(my.data2) = c("Year", "Anomaly","CO2" ) # column name changed into a small and suitable one, which is easy to type in coding
colnames(my.data2)
## [1] "Year" "Anomaly" "CO2"
# Define a function variogram
# Variogram estimation by using define a function variogram
variogram <- function (x, lag ){
Lag <- NULL
vark <- NULL
vario <- NULL
for (k in 1: lag ){
Lag[k] <- k
vark [k] = sd( diff (x,k ))^2
vario [k] = vark [k]/ vark [1]
}
return (as.data.frame( cbind (Lag , vario )))
}
#*********************************************************************************************
Table B.6 contains two time series: the global mean surface air temperature anomaly and the global CO2 concentration. Find the sample autocorrelation function and the variogram for both of these time series. Is either one of the time series stationary?
p <- ggplot (my.data2, aes(x= Year, y=Anomaly))
p + geom_point()+
geom_line()+
geom_smooth(method = "lm", se= FALSE, col="red", linetype = 2)+
labs(x = "Year", y = "Production, 10000lb", title = "Variation of Global air temperature anomaly")+
theme_classic()+
theme(plot.title = element_text(size = rel(1.5), face = "bold", hjust = 0.5),
axis.title = element_text(size = rel(1.5)),
legend.position = "bottom")
## `geom_smooth()` using formula 'y ~ x'
Figure 5: Variation of the global mean surface air temperature anomaly in oC from 1880 -2004. The red dot line shows the fitted regression line.
ggplot(my.data2, aes(x = Anomaly))+
geom_histogram(bins = 10, color="black", fill="white")+
labs(x = "Year", y = "Frequency", title = "Histogram of air temperature anomaly")+
theme_classic()+
theme(plot.title = element_text(size = rel(1.5), face = "bold", hjust = 0.5),
axis.title = element_text(size = rel(1.5)),
legend.position = "bottom")
Figure 6: Histogram of Global air temperature anomaly
# Autocorrelation function code
acf(my.data2[,2], lag.max=length (my.data2[ ,2])/4,type="correlation", main="ACF of global temp anomaly")
Figure 7: Sample Autocorrelation funciton (ACF) for the global surface temperature anomaly for 1880 -2004
Conclusion: As we know that a time series can be called stationary if the mean is fixed and the variance is a constant number. From the ACF plot depicted above, we can clearly say that this time series is stationary as the value of ACF were decreased to zero over time. In addition, there is a presence of mild sinusoidal pattern which is one of the criteria of a typical time series plot.
# call variogram funciton for my.data [,2]; for column two
variogram_Anomaly <- as.data.frame(variogram(my.data2[,2], length (my.data2[ ,2])/4))
ggplot(variogram_Anomaly, aes(x=Lag, y=vario))+
geom_col()+
labs(x = "Lag", y = "Variogram", title = "Variogram of US cheeses production")+
theme_classic()+
theme(plot.title = element_text(size = rel(1.5), face = "bold", hjust = 0.5),
axis.title = element_text(size = rel(1.5)),
legend.position = "bottom")
Figure 8: variogram of the global surface temperature anomaly
Conclusion: If we are unable to predict the data to be either stationary or not by means of ACF, then we can analyze further with the help of variogram analysis. If the variogram varies around a constant number then we can say that, time series is stationary. From the obtained value of variogram, we can clearly say that the values of variogram were constant over time. So, the time series is stationary.
We have got similar results using JUMP software. From ACF value, we assumed that time series was stationary and with further analysis with Variogram, we came to a clear conclusion that, this is a stationary time series.
p <- ggplot (my.data2, aes(x= Year, y=CO2))
p + geom_point()+
geom_line()+
geom_smooth(method = "lm", se= FALSE, col="red", linetype = 2)+
labs(x = "Year", y = "Production, 10000lb", title = "Variation of the global CO2 concentration")+
theme_classic()+
theme(plot.title = element_text(size = rel(1.5), face = "bold", hjust = 0.5),
axis.title = element_text(size = rel(1.5)),
legend.position = "bottom")
## `geom_smooth()` using formula 'y ~ x'
Figure 9: Variation of the global CO2 concentration from 1880 -2004. The red dot line shows the fitted regression line.
ggplot(my.data2, aes(x = CO2))+
geom_histogram(bins = 10, color="black", fill="white")+
labs(x = "Year", y = "Frequency", title = "Histogram of the global CO2 concentration")+
theme_classic()+
theme(plot.title = element_text(size = rel(1.5), face = "bold", hjust = 0.5),
axis.title = element_text(size = rel(1.5)),
legend.position = "bottom")
Figure 10: Histogram of the global CO2 concentration
# Autocorrelation function code
acf(my.data2[,3], lag.max=length (my.data2[ ,3])/4,type="correlation", main="ACF of global CO2 concentration")
Figure 11: Sample Autocorrelation funciton (ACF) for the the global CO2 concentration
Conclusion: As we know that a time series can be called stationary if the mean is fixed and variance is a constant number. From the ACF plot depicted above, we can clearly say that this time series is not stationary as the value of ACF were not decreased to zero over time. In addition, there is no presence of mild sinusoidal pattern.
# call variogram funciton for my.data [,2]; for column two
variogram_CO2 <- as.data.frame(variogram(my.data2[,3], length (my.data2[ ,3])/4))
ggplot(variogram_CO2, aes(x=Lag, y=vario))+
geom_col()+
labs(x = "Lag", y = "Variogram", title = "Variogram of US cheeses production")+
theme_classic()+
theme(plot.title = element_text(size = rel(1.5), face = "bold", hjust = 0.5),
axis.title = element_text(size = rel(1.5)),
legend.position = "bottom")
Figure 12: variogram of US blue and gorgonzola cheeses production for 1950 -1997
Conclusion: If we are unable to predict the data to be either stationary or not by means of ACF, then we can analyze further with the help of variogram analysis. If the variogram varies around a constant number then we can say that, time series is stationary. From the obtained value of variogram, we can clearly say that the values of variogram were increased over time. So, the time series is not stationary.
We have got similar results using JUMP software. From ACF value, we assumed that time series was not stationary and with further analysis with Variogram, we came to a clear conclusion that, this is a non-stationary time series.