Lab 1 re-do Time series autocorrelation
# install.packages('lmtest') # install on computer
library(lmtest) # load into R
co2data <- read.csv("/Users/caitlin/Dropbox/CAITLINS DOCUMENTS/CU Boulder/Courses/GEOG 5023 Quant methods - Spielman/Labs and Exercises/Lab 1/co2_LAB1.csv",
header = TRUE)
names(co2data)
## [1] "year" "month" "time" "co2" "site"
summary(co2data)
## year month time co2 site
## Min. :1969 Min. : 1.00 Min. :1969 Min. :322 Min. :0.0
## 1st Qu.:1978 1st Qu.: 4.00 1st Qu.:1979 1st Qu.:337 1st Qu.:0.0
## Median :1988 Median : 7.00 Median :1989 Median :351 Median :0.5
## Mean :1988 Mean : 6.51 Mean :1989 Mean :352 Mean :0.5
## 3rd Qu.:1998 3rd Qu.: 9.75 3rd Qu.:1998 3rd Qu.:366 3rd Qu.:1.0
## Max. :2007 Max. :12.00 Max. :2008 Max. :389 Max. :1.0
# split the data
LaJolla <- co2data[co2data$site == 0, ]
head(LaJolla)
## year month time co2 site
## 1 1969 2 1969 324.4 0
## 2 1969 3 1969 325.6 0
## 3 1969 4 1969 326.7 0
## 4 1969 5 1969 327.3 0
## 5 1969 6 1970 326.8 0
## 6 1969 7 1970 325.9 0
sitexlm <- lm(co2 ~ time + I(time^2), data = LaJolla) # Best model from lab 1
dwtest(sitexlm)
##
## Durbin-Watson test
##
## data: sitexlm
## DW = 0.3067, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
# p = very small; H0: rho = 0. Reject this. There IS some kind of
# autocorrelation in the residuals.
acf(sitexlm$residuals) # Correlellogram of the residuals. Notice the strong seasonal effect.
sitexlm.ts <- ts(LaJolla$co2, start = min(LaJolla$year), frequency = 12) # Convert a data frame into a time series.
plot(decompose(sitexlm.ts)) # Plot the time series using decompose
# To take account of seasonal effects, add predictor variables for season
Season <- cycle(sitexlm.ts)
Time <- time(sitexlm.ts)
lmx <- lm(LaJolla$co2 ~ LaJolla$time + factor(Season)) #original model plus seasonality. Could do this again for time.
ts.plot(cbind(sitexlm.ts, lmx$fitted), lty = 1:2, col = c(1, 2))
plot(lmx$residuals)
# The model predicts well, but the residuals are still violating
# assumptions - variables are not independent and there is
# autocorrelation. So this raises the question of what is a good model?
# The thing you CANT do with this model is to interpret the coefficients,
# like to say 'adding an additional x will impact the outcome in this
# way...' because you are not certain about the coefficients. But if your
# goal is just prediction, then this is pretty good.