Time series autocorrelation

Feb 26, 2013

# install.packages('lmtest') # install on computer (only need to do this
# once EVER on my computer)
library(lmtest)  # load into R

co2data <- read.csv("/Users/caitlin/Dropbox/CAITLINS DOCUMENTS/CU Boulder/Courses/GEOG 5023 Quant methods - Spielman/Labs and Exercises/Lab 1/co2_LAB1.csv", 
    header = TRUE)
## Warning: cannot open file '/Users/caitlin/Dropbox/CAITLINS DOCUMENTS/CU
## Boulder/Courses/GEOG 5023 Quant methods - Spielman/Labs and Exercises/Lab
## 1/co2_LAB1.csv': No such file or directory
## Error: cannot open the connection
names(co2data)
## Error: object 'co2data' not found
summary(co2data)
## Error: object 'co2data' not found

# split the data
LaJolla <- co2data[co2data$site == 0, ]
## Error: object 'co2data' not found
head(LaJolla)
## Error: object 'LaJolla' not found

sitexlm <- lm(co2 ~ time + I(time^2), data = LaJolla)  # Best model from lab 1
## Error: object 'LaJolla' not found
dwtest(sitexlm)
## Error: object 'sitexlm' not found
# p = very small; H0: rho = 0. Reject this. There IS some kind of
# autocorrelation in the residuals.

acf(sitexlm$residuals)  # Correlellogram of the residuals. Notice the strong seasonal effect.
## Error: object 'sitexlm' not found

sitexlm.ts <- ts(LaJolla$co2, start = min(LaJolla$year), frequency = 12)  # Convert a data frame into a time series.
## Error: object 'LaJolla' not found
plot(decompose(sitexlm.ts))  # Plot the time series using decompose
## Error: object 'sitexlm.ts' not found

# To take account of seasonal effects, add predictor variables for season
Season <- cycle(sitexlm.ts)
## Error: object 'sitexlm.ts' not found

Time <- time(sitexlm.ts)
## Error: object 'sitexlm.ts' not found

lmx <- lm(LaJolla$co2 ~ LaJolla$time + factor(Season))  #original model plus seasonality. Could do this again for time.
## Error: object 'LaJolla' not found
ts.plot(cbind(sitexlm.ts, lmx$fitted), lty = 1:2, col = c(1, 2))
## Error: object 'sitexlm.ts' not found
plot(lmx$residuals)
## Error: object 'lmx' not found
# The model predicts well, but the residuals are still violating
# assumptions - variables are not independent and there is
# autocorrelation. So this raises the question of what is a good model?
# The thing you CANT do with this model is to interpret the coefficients,
# like to say 'adding an additional x will impact the outcome in this
# way...' because you are not certain about the coefficients. But if your
# goal is just prediction, then this is pretty good.