Exercise from Lecture 7

Material from Lab 1

CO2 <- read.csv("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/co2_LAB1.csv", 
    header = T)
mauna <- subset(CO2, site == 0)

mauna$cent <- mauna$time - mean(mauna$time)
lm2 <- lm(co2 ~ cent + I(cent^2), mauna)
summary(lm2)

## 
## Call:
## lm(formula = co2 ~ cent + I(cent^2), data = mauna)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.061 -1.766  0.155  1.836  4.419 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.50e+02   1.54e-01  2273.7   <2e-16 ***
## cent        1.54e+00   9.14e-03   168.9   <2e-16 ***
## I(cent^2)   9.92e-03   9.09e-04    10.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 2.22 on 464 degrees of freedom
## Multiple R-squared: 0.984,   Adjusted R-squared: 0.984 
## F-statistic: 1.43e+04 on 2 and 464 DF,  p-value: <2e-16

plot(lm2$residuals ~ lm2$fitted.values, main = "Residuals for Quadratic  Model", 
    xlab = "Fitted Values (ppm)", ylab = "Residuals")

plot of chunk unnamed-chunk-2

plot(co2 ~ cent, mauna, main = "CO2 Concentration Quadratic Model", xlab = "Centered Years (1969-2007)", 
    ylab = "CO2 Concentration (ppm)", cex = 0.6)
curve(predict(lm2, data.frame(cent = x), type = "resp"), add = TRUE, col = "red", 
    lwd = 3)

plot of chunk unnamed-chunk-2

Run Durbin-Watson Test

library(lmtest)

## Loading required package: zoo

## Attaching package: 'zoo'

## The following object(s) are masked from 'package:base':
## 
## as.Date, as.Date.numeric

# Null Hypothesis: rho = 0; there is only random error and no
# autocorrelation
dwtest(lm2)  # DW = 0.3067, p-value < 2.2e16

## 
##  Durbin-Watson test
## 
## data:  lm2 
## DW = 0.3067, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# This tells us that there is autocorrelation.

Plot Residuals on a Correllelogram to Examine Seasonality of Residuals

acf(lm2$residuals, main = "Correllelogram of lm2 Residuals")  # That is some fine seasonality.

plot of chunk unnamed-chunk-4

Create a time series and plot it using decompose

# Create a time series using ts()
lm2.ts <- ts(mauna$co2, start = min(mauna$year), frequency = 12)
# start= indicates the first observation time unit. start=min(mauna$year)
# indicates that we should start in the minimum year (1969).  frequency=
# indicates the number of observations per unit of time. (In this case we
# have 12.)
head(mauna)

##   year month time   co2 site   cent
## 1 1969     2 1969 324.4    0 -19.42
## 2 1969     3 1969 325.6    0 -19.33
## 3 1969     4 1969 326.7    0 -19.25
## 4 1969     5 1969 327.3    0 -19.17
## 5 1969     6 1970 326.8    0 -19.08
## 6 1969     7 1970 325.9    0 -19.00

# Plot using decompose will separate a time series into trend, seasonal,
# and random parts.

plot(decompose(lm2.ts))

plot of chunk unnamed-chunk-6

Create a Seasonal Factor and Refit Model with Dummy

# Cycle() assigns values for each observation of each time unit (the
# seasonal effect).
season <- cycle(lm2.ts)  # In this case, we're using months in a time unit of years.
head(season)

## [1] 1 2 3 4 5 6

# Compare to the time() function, which treats observations as fractions
# of the time unit
time <- time(lm2.ts)  # In this case,0.083333 for each month
head(time)

## [1] 1969 1969 1969 1969 1969 1969


# Refitting the model with factor(season) as a dummy.
lm3 <- lm(mauna$co2 ~ mauna$cent + I(mauna$cent^2) + factor(season))

Diagnostics on New Model

summary(lm3)  # adj. R^2 = 0.9983

## 
## Call:
## lm(formula = mauna$co2 ~ mauna$cent + I(mauna$cent^2) + factor(season))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8937 -0.5283  0.0676  0.4837  1.7820 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       3.51e+02   1.21e-01 2901.81  < 2e-16 ***
## mauna$cent        1.55e+00   2.96e-03  522.78  < 2e-16 ***
## I(mauna$cent^2)   9.92e-03   2.94e-04   33.69  < 2e-16 ***
## factor(season)2   8.01e-01   1.63e-01    4.92  1.2e-06 ***
## factor(season)3   1.92e+00   1.63e-01   11.83  < 2e-16 ***
## factor(season)4   2.30e+00   1.63e-01   14.14  < 2e-16 ***
## factor(season)5   1.63e+00   1.63e-01   10.00  < 2e-16 ***
## factor(season)6  -2.12e-02   1.63e-01   -0.13      0.9    
## factor(season)7  -2.18e+00   1.63e-01  -13.38  < 2e-16 ***
## factor(season)8  -3.90e+00   1.63e-01  -23.97  < 2e-16 ***
## factor(season)9  -3.94e+00   1.63e-01  -24.25  < 2e-16 ***
## factor(season)10 -2.79e+00   1.63e-01  -17.16  < 2e-16 ***
## factor(season)11 -1.59e+00   1.63e-01   -9.75  < 2e-16 ***
## factor(season)12 -6.69e-01   1.64e-01   -4.09  5.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.718 on 453 degrees of freedom
## Multiple R-squared: 0.998,   Adjusted R-squared: 0.998 
## F-statistic: 2.13e+04 on 13 and 453 DF,  p-value: <2e-16

# Why isn't June significant? I don't know, but the mdoel does fit better.

par(mfrow = c(1, 2))
plot(lm2$residuals ~ lm2$fitted.values, main = "Residuals for Quadratic Model", 
    xlab = "Fitted Values (ppm)", ylab = "Residuals")
plot(lm3$residuals ~ lm3$fitted.values, main = "Residuals for Seasonal Quadratic Model", 
    xlab = "Fitted Values (ppm)", ylab = "Residuals")

plot of chunk unnamed-chunk-9

Honestly, the residuals look more patterned now. Dang.

par(mfrow = c(1, 2))
plot(co2 ~ cent, mauna, main = "CO2 Concentration Quadratic Model", xlab = "Centered Years (1969-2007)", 
    ylab = "CO2 Concentration (ppm)", type = "l")
curve(predict(lm2, data.frame(cent = x), type = "resp"), add = TRUE, col = "red", 
    lwd = 3)
ts.plot(cbind(lm2.ts, lm3$fitted), lty = 1:2, col = c(1, 3), lwd = c(1, 2), 
    main = "CO2 Concentration Seasonal Quadratic Model", ylab = "CO2 Concentration (ppm)", 
    xlab = "Years")

plot of chunk unnamed-chunk-10

Run Durbin-Watson Test

library(lmtest)
# Null Hypothesis: rho = 0; there is only random error and no
# autocorrelation
dwtest(lm3)  # DW = 0.1761, p-value < 2.2e16

## 
##  Durbin-Watson test
## 
## data:  lm3 
## DW = 0.1761, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# This tells us that there is *still* autocorrelation.

Plot Residuals on a Correllelogram to Examine Seasonality of Residuals

par(mfrow = c(1, 2))
acf(lm2$residuals, main = "Correllelogram of lm2 Residuals")
acf(lm3$residuals, main = "Correllelogram of lm3 Residuals")

plot of chunk unnamed-chunk-12

There is still autocorrelation, but we've taken care of seasonality.

Cochrane Orcutt

# The Cochrane-Orcutt Procedure attempts to correct for the
# autocorrelation.  It also returns a value for rho.  Note that to read
# the output, you just type the object name; don't use the summary()
# function.
library(orcutt)
lm4 <- cochrane.orcutt(lm2)
lm4  # Gives a rho of 0.8466136; the new model has an adj. R^2 of 0.9995

## $Cochrane.Orcutt
## 
## Call:
## lm(formula = YB ~ XB - 1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.732 -0.898  0.516  0.884  2.175 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## XB(Intercept) 3.50e+02   5.34e-01  655.21   <2e-16 ***
## XBcent        1.54e+00   3.20e-02   48.28   <2e-16 ***
## XBI(cent^2)   9.77e-03   3.18e-03    3.08   0.0022 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 1.18 on 463 degrees of freedom
## Multiple R-squared:    1,    Adjusted R-squared:    1 
## F-statistic: 3.25e+05 on 3 and 463 DF,  p-value: <2e-16 
## 
## 
## $rho
## [1] 0.8466
## 
## $number.interaction
## [1] 1