Lab 1 re-do Time series autocorrelation

Feb 26, 2013

# install.packages('lmtest') # install on computer
library(lmtest)  # load into R

co2data <- read.csv("/Users/caitlin/Dropbox/CAITLINS DOCUMENTS/CU Boulder/Courses/GEOG 5023 Quant methods - Spielman/Labs and Exercises/Lab 1/co2_LAB1.csv", 
    header = TRUE)
names(co2data)

## [1] "year"  "month" "time"  "co2"   "site"

summary(co2data)

##       year          month            time           co2           site    
##  Min.   :1969   Min.   : 1.00   Min.   :1969   Min.   :322   Min.   :0.0  
##  1st Qu.:1978   1st Qu.: 4.00   1st Qu.:1979   1st Qu.:337   1st Qu.:0.0  
##  Median :1988   Median : 7.00   Median :1989   Median :351   Median :0.5  
##  Mean   :1988   Mean   : 6.51   Mean   :1989   Mean   :352   Mean   :0.5  
##  3rd Qu.:1998   3rd Qu.: 9.75   3rd Qu.:1998   3rd Qu.:366   3rd Qu.:1.0  
##  Max.   :2007   Max.   :12.00   Max.   :2008   Max.   :389   Max.   :1.0


# split the data
LaJolla <- co2data[co2data$site == 0, ]
head(LaJolla)

##   year month time   co2 site
## 1 1969     2 1969 324.4    0
## 2 1969     3 1969 325.6    0
## 3 1969     4 1969 326.7    0
## 4 1969     5 1969 327.3    0
## 5 1969     6 1970 326.8    0
## 6 1969     7 1970 325.9    0


sitexlm <- lm(co2 ~ time + I(time^2), data = LaJolla)  # Best model from lab 1
dwtest(sitexlm)

## 
##  Durbin-Watson test
## 
## data:  sitexlm 
## DW = 0.3067, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# p = very small; H0: rho = 0. Reject this. There IS some kind of
# autocorrelation in the residuals.

acf(sitexlm$residuals)  # Correlellogram of the residuals. Notice the strong seasonal effect.

plot of chunk unnamed-chunk-1


sitexlm.ts <- ts(LaJolla$co2, start = min(LaJolla$year), frequency = 12)  # Convert a data frame into a time series.
plot(decompose(sitexlm.ts))  # Plot the time series using decompose

plot of chunk unnamed-chunk-1


# To take account of seasonal effects, add predictor variables for season
Season <- cycle(sitexlm.ts)

Time <- time(sitexlm.ts)

lmx <- lm(LaJolla$co2 ~ LaJolla$time + factor(Season))  #original model plus seasonality. Could do this again for time.
ts.plot(cbind(sitexlm.ts, lmx$fitted), lty = 1:2, col = c(1, 2))

plot of chunk unnamed-chunk-1

plot(lmx$residuals)

plot of chunk unnamed-chunk-1

# The model predicts well, but the residuals are still violating
# assumptions - variables are not independent and there is
# autocorrelation. So this raises the question of what is a good model?
# The thing you CANT do with this model is to interpret the coefficients,
# like to say 'adding an additional x will impact the outcome in this
# way...' because you are not certain about the coefficients. But if your
# goal is just prediction, then this is pretty good.