CO2 <- read.csv("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/co2_LAB1.csv",
header = T)
mauna <- subset(CO2, site == 0)
mauna$cent <- mauna$time - mean(mauna$time)
lm2 <- lm(co2 ~ cent + I(cent^2), mauna)
summary(lm2)
##
## Call:
## lm(formula = co2 ~ cent + I(cent^2), data = mauna)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.061 -1.766 0.155 1.836 4.419
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.50e+02 1.54e-01 2273.7 <2e-16 ***
## cent 1.54e+00 9.14e-03 168.9 <2e-16 ***
## I(cent^2) 9.92e-03 9.09e-04 10.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.22 on 464 degrees of freedom
## Multiple R-squared: 0.984, Adjusted R-squared: 0.984
## F-statistic: 1.43e+04 on 2 and 464 DF, p-value: <2e-16
plot(lm2$residuals ~ lm2$fitted.values, main = "Residuals for Quadratic Model",
xlab = "Fitted Values (ppm)", ylab = "Residuals")
plot(co2 ~ cent, mauna, main = "CO2 Concentration Quadratic Model", xlab = "Centered Years (1969-2007)",
ylab = "CO2 Concentration (ppm)", cex = 0.6)
curve(predict(lm2, data.frame(cent = x), type = "resp"), add = TRUE, col = "red",
lwd = 3)
library(lmtest)
## Loading required package: zoo
## Attaching package: 'zoo'
## The following object(s) are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Null Hypothesis: rho = 0; there is only random error and no
# autocorrelation
dwtest(lm2) # DW = 0.3067, p-value < 2.2e16
##
## Durbin-Watson test
##
## data: lm2
## DW = 0.3067, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
# This tells us that there is autocorrelation.
acf(lm2$residuals, main = "Correllelogram of lm2 Residuals") # That is some fine seasonality.
# Create a time series using ts()
lm2.ts <- ts(mauna$co2, start = min(mauna$year), frequency = 12)
# start= indicates the first observation time unit. start=min(mauna$year)
# indicates that we should start in the minimum year (1969). frequency=
# indicates the number of observations per unit of time. (In this case we
# have 12.)
head(mauna)
## year month time co2 site cent
## 1 1969 2 1969 324.4 0 -19.42
## 2 1969 3 1969 325.6 0 -19.33
## 3 1969 4 1969 326.7 0 -19.25
## 4 1969 5 1969 327.3 0 -19.17
## 5 1969 6 1970 326.8 0 -19.08
## 6 1969 7 1970 325.9 0 -19.00
# Plot using decompose will separate a time series into trend, seasonal,
# and random parts.
plot(decompose(lm2.ts))
# Cycle() assigns values for each observation of each time unit (the
# seasonal effect).
season <- cycle(lm2.ts) # In this case, we're using months in a time unit of years.
head(season)
## [1] 1 2 3 4 5 6
# Compare to the time() function, which treats observations as fractions
# of the time unit
time <- time(lm2.ts) # In this case,0.083333 for each month
head(time)
## [1] 1969 1969 1969 1969 1969 1969
# Refitting the model with factor(season) as a dummy.
lm3 <- lm(mauna$co2 ~ mauna$cent + I(mauna$cent^2) + factor(season))
summary(lm3) # adj. R^2 = 0.9983
##
## Call:
## lm(formula = mauna$co2 ~ mauna$cent + I(mauna$cent^2) + factor(season))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8937 -0.5283 0.0676 0.4837 1.7820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.51e+02 1.21e-01 2901.81 < 2e-16 ***
## mauna$cent 1.55e+00 2.96e-03 522.78 < 2e-16 ***
## I(mauna$cent^2) 9.92e-03 2.94e-04 33.69 < 2e-16 ***
## factor(season)2 8.01e-01 1.63e-01 4.92 1.2e-06 ***
## factor(season)3 1.92e+00 1.63e-01 11.83 < 2e-16 ***
## factor(season)4 2.30e+00 1.63e-01 14.14 < 2e-16 ***
## factor(season)5 1.63e+00 1.63e-01 10.00 < 2e-16 ***
## factor(season)6 -2.12e-02 1.63e-01 -0.13 0.9
## factor(season)7 -2.18e+00 1.63e-01 -13.38 < 2e-16 ***
## factor(season)8 -3.90e+00 1.63e-01 -23.97 < 2e-16 ***
## factor(season)9 -3.94e+00 1.63e-01 -24.25 < 2e-16 ***
## factor(season)10 -2.79e+00 1.63e-01 -17.16 < 2e-16 ***
## factor(season)11 -1.59e+00 1.63e-01 -9.75 < 2e-16 ***
## factor(season)12 -6.69e-01 1.64e-01 -4.09 5.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.718 on 453 degrees of freedom
## Multiple R-squared: 0.998, Adjusted R-squared: 0.998
## F-statistic: 2.13e+04 on 13 and 453 DF, p-value: <2e-16
# Why isn't June significant? I don't know, but the mdoel does fit better.
par(mfrow = c(1, 2))
plot(lm2$residuals ~ lm2$fitted.values, main = "Residuals for Quadratic Model",
xlab = "Fitted Values (ppm)", ylab = "Residuals")
plot(lm3$residuals ~ lm3$fitted.values, main = "Residuals for Seasonal Quadratic Model",
xlab = "Fitted Values (ppm)", ylab = "Residuals")
Honestly, the residuals look more patterned now. Dang.
par(mfrow = c(1, 2))
plot(co2 ~ cent, mauna, main = "CO2 Concentration Quadratic Model", xlab = "Centered Years (1969-2007)",
ylab = "CO2 Concentration (ppm)", type = "l")
curve(predict(lm2, data.frame(cent = x), type = "resp"), add = TRUE, col = "red",
lwd = 3)
ts.plot(cbind(lm2.ts, lm3$fitted), lty = 1:2, col = c(1, 3), lwd = c(1, 2),
main = "CO2 Concentration Seasonal Quadratic Model", ylab = "CO2 Concentration (ppm)",
xlab = "Years")
library(lmtest)
# Null Hypothesis: rho = 0; there is only random error and no
# autocorrelation
dwtest(lm3) # DW = 0.1761, p-value < 2.2e16
##
## Durbin-Watson test
##
## data: lm3
## DW = 0.1761, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
# This tells us that there is *still* autocorrelation.
par(mfrow = c(1, 2))
acf(lm2$residuals, main = "Correllelogram of lm2 Residuals")
acf(lm3$residuals, main = "Correllelogram of lm3 Residuals")
There is still autocorrelation, but we've taken care of seasonality.
# The Cochrane-Orcutt Procedure attempts to correct for the
# autocorrelation. It also returns a value for rho. Note that to read
# the output, you just type the object name; don't use the summary()
# function.
library(orcutt)
lm4 <- cochrane.orcutt(lm2)
lm4 # Gives a rho of 0.8466136; the new model has an adj. R^2 of 0.9995
## $Cochrane.Orcutt
##
## Call:
## lm(formula = YB ~ XB - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.732 -0.898 0.516 0.884 2.175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## XB(Intercept) 3.50e+02 5.34e-01 655.21 <2e-16 ***
## XBcent 1.54e+00 3.20e-02 48.28 <2e-16 ***
## XBI(cent^2) 9.77e-03 3.18e-03 3.08 0.0022 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.18 on 463 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 3.25e+05 on 3 and 463 DF, p-value: <2e-16
##
##
## $rho
## [1] 0.8466
##
## $number.interaction
## [1] 1