Synopsis

In this document, I create a datset with two time-series about global climate trends and prove their covariance. The first time-series is C02 levels and the second has global annual Temerature averages and covers the same years (1958-2011).

Data Acquisition and manipulation

tempURL <- "http://climate.nasa.gov/system/internal_resources/details/original/647_Global_Temperature_Data_File.txt"
co2URL <- "ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt"

Datasets used for this analysis:

  • The temperature dataset has been provided by NASA Earth Science communication team and can be downloaded from http://climate.nasa.gov/vital-signs/global-temperature/.

  • The co2 dataset has been provided by NOAA and describes monthly mean carbon dioxide measured in parts per million at Mauna Loa Observatory, Hawaii

dat <- readLines(tempURL, encoding = "UTF16LE") # data is downloaded from the URL specified above
dat_cut <- strsplit(dat, "    ")[5:140] # lines need to be split by a 4-space delimeter

names <- c("Year",  "Annual_Mean", "5-year_Mean") # prepparing a vector of names for the final data frame
options(warn=-1) # following line produces watrning because not all expressions can be converted to numeric, supressing warnings
temps <- as.data.frame(do.call(rbind, lapply(dat_cut, as.numeric))[,1:3]) #creating a final data frame with value as numeric
options(warn=0) #turning on warnings again
colnames(temps) <- names # assigning vector of names

Here we construct the temps dataset by downloading the data file and converting it into a R data frame.

head(temps) #taking look at the first 5 rows of the temp dataset
##   Year Annual_Mean 5-year_Mean
## 1 1880       -0.19          NA
## 2 1881       -0.10          NA
## 3 1882       -0.08       -0.16
## 4 1883       -0.19       -0.19
## 5 1884       -0.26       -0.22
## 6 1885       -0.30       -0.27

Preview of the temps dataset

dat <- readLines(co2URL, )[73:776] # downloading the data file from URL above
dat_cut <- strsplit(dat, "    ") # splitting lines of the data file according to a tab delimeter

co2 <- as.data.frame(do.call(rbind, lapply(dat_cut, as.character))[,c(1,3)]) # creating a data frame with with the values as numeric
names(co2) <- c("year month", "averageco2") #assigning names to the columns of the data frame

co2$year <- sapply(lapply(strsplit(as.character(co2$`year month`),"  "), function(l) l[[1]]),as.numeric) #extracting year from the year month column
co2$month <- sapply(lapply(strsplit(as.character(co2$`year month`),"  "), function(l) l[[2]]),as.numeric) # extracting month from the year month column
co2$`year month` <- NULL # deleting the year month column
co2$averageco2 <- as.numeric(gsub("^\\s+|\\s+$", "", as.character(co2$averageco2))) # removing leading and railing spaces from the averageco2 columns
co2agg <- aggregate(averageco2 ~ year,co2, FUN = mean) # aggregating the dataset to get yearly averages from monthly data
head(co2agg) #taking look at the first 5 rows of the co2 dataset
##   year averageco2
## 1 1958   232.2670
## 2 1959   315.9742
## 3 1960   316.9075
## 4 1961   317.6375
## 5 1962   318.4508
## 6 1963   318.9942

Preview of the aggregated co2 dataset

joined <- merge(temps[,1:2], co2agg, by.x = "Year", by.y ="year")
rownames(joined) <- joined$Year
joined$Year <- NULL

Creating a joined dataset.

head(joined)
##      Annual_Mean averageco2
## 1958        0.06   232.2670
## 1959        0.03   315.9742
## 1960       -0.03   316.9075
## 1961        0.05   317.6375
## 1962        0.02   318.4508
## 1963        0.06   318.9942

Time-series pltting

Temerature <- as.ts(joined$Annual_Mean)
co2level <- as.ts(joined$averageco2)                    

Creating time-series.

library(forecast)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
tsdisplay(Temerature)

tsdisplay(co2level)

Linear regression

library(ggplot2)
ggplot(joined,aes(x=Annual_Mean, y=averageco2)) + geom_point() + ggtitle(label="Annual mean temperatures vs. CO2 particles") + geom_smooth(method = "lm", span=0.9)

summary(lm(Annual_Mean ~ averageco2,joined))
## 
## Call:
## lm(formula = Annual_Mean ~ averageco2, data = joined)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30400 -0.11443 -0.01301  0.08370  0.51937 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.9801757  0.1972128  -10.04 3.94e-14 ***
## averageco2   0.0065477  0.0005663   11.56  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1513 on 56 degrees of freedom
## Multiple R-squared:  0.7048, Adjusted R-squared:  0.6995 
## F-statistic: 133.7 on 1 and 56 DF,  p-value: < 2.2e-16

Cointegration

Temerature <- as.ts(joined$Annual_Mean)
co2level <- as.ts(joined$averageco2)                    

1.check for unit root of both time-series

library(fUnitRoots)
adfTest(Temerature, 10)
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 10
##   STATISTIC:
##     Dickey-Fuller: 1.4771
##   P VALUE:
##     0.9621 
## 
## Description:
##  Thu Nov 24 19:52:51 2016 by user:
adfTest(co2level, 10)
## Warning in adfTest(co2level, 10): p-value greater than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 10
##   STATISTIC:
##     Dickey-Fuller: 5.8784
##   P VALUE:
##     0.99 
## 
## Description:
##  Thu Nov 24 19:52:51 2016 by user:
engle <- lm(Temerature~co2level) 
summary(engle)
## 
## Call:
## lm(formula = Temerature ~ co2level)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30400 -0.11443 -0.01301  0.08370  0.51937 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.9801757  0.1972128  -10.04 3.94e-14 ***
## co2level     0.0065477  0.0005663   11.56  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1513 on 56 degrees of freedom
## Multiple R-squared:  0.7048, Adjusted R-squared:  0.6995 
## F-statistic: 133.7 on 1 and 56 DF,  p-value: < 2.2e-16

Cointegrated!