These notes are written as supplementing material for Western Washington University’s ESCI597A Time Series and Spatial Analysis with Dr. Andy Bunn. Many examples are attributed to him here
If your time series fits an ar(1) model, meaing that the correlation at is correlated to t-1, fit that model and you can use a predict function to plot forecasted points with standard error lines around that prediction.
rm(list=ls())
data(sunspot.year)
sunspot.ar <- ar(sunspot.year)
sunspot.ar
##
## Call:
## ar(x = sunspot.year)
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 1.1305 -0.3524 -0.1745 0.1403 -0.1358 0.0963 -0.0556 0.0076
## 9
## 0.1941
##
## Order selected 9 sigma^2 estimated as 267.5
sunspot.forecast <- predict(sunspot.ar, n.ahead = 27)
plot(sunspot.year,xlim=c(1700, 2015),col="grey",lwd=1.5, ylab="Sunspots (n)")
lines(sunspot.forecast$pred,col="green",lwd=1.5)
upper <- sunspot.forecast$pred + sunspot.forecast$se
lower <- sunspot.forecast$pred - sunspot.forecast$se
xx <- c(time(sunspot.forecast$pred), rev(time(sunspot.forecast$pred)))
yy <- c(upper,rev(lower))
plot(sunspot.year,xlim=c(1970, 2015),col="grey",lwd=1.5, type="b",pch=20,
ylab="Sunspots (n)")
polygon(xx,yy,col="lightgreen",border = NA)
points(sunspot.forecast$pred,col="darkgreen",lwd=1.5,pch=20)
lines(sunspot.forecast$pred,col="darkgreen",lwd=1.5)
n <- 250
x <- ts(rnorm(n))
epsilon <- arima.sim(model = list(ar=0.9),n = n)
y <- x + epsilon
# step 1 - the regular ols regression
ols1 <- lm(y~x)
# step 2 - inspect the residuals
acf(residuals(ols1))
pacf(residuals(ols1)) # looks like an AR1
plot(epsilon)
# step 3 - get a good model
ar1 <- coef(arima(residuals(ols1), order = c(1,0,0), include.mean = FALSE)) #AR(1) for residuals
ar1
## ar1
## 0.8325414
# step 4 - adjust using a gls with cor structure
library(nlme)
gls1 <- gls(y~x, correlation = corAR1(ar1)) #give an init value
# note you can run a gls that is equiv to ols via: gls(y~x)
# You would do this by not including a correlationmatrix.
# Compare the models
ols1.summary <- summary(ols1)
gls1.summary <- summary(gls1)
ols1.summary
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7605 -1.1430 0.0782 1.2854 4.0752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6369 0.1085 -5.869 1.40e-08 ***
## x 0.9596 0.1093 8.783 2.67e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.709 on 248 degrees of freedom
## Multiple R-squared: 0.2373, Adjusted R-squared: 0.2342
## F-statistic: 77.14 on 1 and 248 DF, p-value: 2.667e-16
gls1.summary
## Generalized least squares fit by REML
## Model: y ~ x
## Data: NULL
## AIC BIC logLik
## 688.7482 702.8019 -340.3741
##
## Correlation Structure: AR(1)
## Formula: ~1
## Parameter estimate(s):
## Phi
## 0.83981
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -0.6505294 0.3623920 -1.795099 0.0739
## x 0.9979114 0.0425272 23.465250 0.0000
##
## Correlation:
## (Intr)
## x -0.012
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.74769016 -0.66354960 0.07268208 0.74591418 2.36561271
##
## Residual standard error: 1.725714
## Degrees of freedom: 250 total; 248 residual
# geeky plot of the coefs and their se
# getting the coefs from the models is easy
# getting the se out is a bit more involved
#bhat
bhat.ols1 <- coef(ols1)[2]
bhat.gls1 <- coef(gls1)[2]
#se of bhat
bhat.se.ols1 <- ols1.summary$coef[[4]]
bhat.se.gls1 <- gls1.summary$tTable[2,2]
require(ggplot2)
## Loading required package: ggplot2
dat <- data.frame(bhat = c(bhat.ols1,bhat.gls1),
bhat.se = c(bhat.se.ols1,bhat.se.gls1),
model = c("OLS","GLS"))
my.ylim <- c(min(range(dat$bhat-dat$bhat.se))-0.1,
max(range(dat$bhat+dat$bhat.se))+0.1)
ggplot(dat, aes(y = bhat, x = model)) +
geom_point(size = 5) +
ylab(expression(hat(beta))) + xlab("Model") +
geom_errorbar(aes(ymin=bhat-bhat.se, ymax=bhat+bhat.se), width=0.2) +
ylim(my.ylim) +
theme(text = element_text(size=14))
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(hydroTSM)
setwd("/Users/bgatts/Desktop/WWU/ESCI597/TimeSeries")
CRLA.SNOTEL=read.csv("CRLA_SNOTEL_slim_corrected.csv", header=T, stringsAsFactors=F)
CRLA.Date=as.Date(CRLA.SNOTEL$Date, "%m/%d/%Y")
# Do it as a matrix?
CRLA.xts=xts(CRLA.SNOTEL[,2:7], order.by=CRLA.Date)
# Do it as a matrix!
CRLA.annual=daily2annual(CRLA.xts, FUN=mean, na.rm=TRUE)
plot(CRLA.annual)
CRLA.monthly=daily2monthly(CRLA.xts, FUN=mean, na.rm=TRUE)
plot(CRLA.monthly)
##
CRLA.monthlyacf=acf(coredata(CRLA.monthly))
# Call variables individually below
# or
# Call variables through matrix CRLA.monthly
CRLA.Tminacf=acf(coredata(CRLA.monthly[,4]))
CRLA.Tavgacf=acf(coredata(CRLA.monthly[,5]))
# or
# using $ commands
CRLA.SWEacf=acf(coredata(CRLA.monthly$SWE))
CRLA.PPTincacf=acf(coredata(CRLA.monthly[,6]))
CRLA.PPTaccacf=acf(coredata(CRLA.monthly[,2]))
##Build Forecasting Model here
CRLA.monthly.Tavg.ts=ts(CRLA.monthly$Tavg)
CRLA.monthlytavg.ar=ar(CRLA.monthly.Tavg.ts)
CRLA.monthlytavg.ar
##
## Call:
## ar(x = CRLA.monthly.Tavg.ts)
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.5640 0.1495 -0.2673 -0.1366 -0.0310 0.0536 -0.1657 -0.0732
## 9 10 11 12 13
## -0.1009 -0.0124 0.2551 0.2020 -0.2579
##
## Order selected 13 sigma^2 estimated as 11.47
CRLA.monthlytavg.forecast=predict(CRLA.monthlytavg.ar, n.ahead = 24)
plot(CRLA.monthly.Tavg.ts,xlim=c(0,200),col="grey",lwd=1.5, ylab="CRLA Tavg")
lines(CRLA.monthlytavg.forecast$pred,col="green",lwd=1.5)
Time above is in Months.
sun.train <- window(sunspot.year, end = 1933) # end of solar cycle 16 (cycles since 1755)
sun.test <- window(sunspot.year, start = 1933) # start of solar cycle 17
plot(sunspot.year,col="grey",ylab="Sunspots (n)")
lines(sun.train,col="red",lty="dotted",lwd=2)
lines(sun.test,col="blue",lty="dotted",lwd=2)
load("/Users/bgatts/Desktop/WWU/ESCI597/TimeSeries/Foreacsting/ESCI597/treeClim.RData")
treeClim.ts=ts(treeClim$treeGrowth, start=1896, frequency=1)
treeClim.tempts=ts(treeClim$minTemp, start=1896, frequency=1)
# step 1 - the regular ols regression
treeClim.ols=lm(treeClim.ts~treeClim[[1]])
# step 2 - inspect the residuals
acf(residuals(treeClim.ols))
pacf(residuals(treeClim.ols)) # looks like an AR2
# step 3 - get a good model
ar2 <- coef(arima(residuals(treeClim.ols), order = c(2,0,0), include.mean = FALSE)) #AR(2) for residuals
ar2
## ar1 ar2
## 0.1557606 0.2000382
# step 4 - adjust using a gls with cor structure
library(nlme)
treeClim.gls <- gls(treeClim.ts~treeClim.tempts, correlation = corAR1(ar2)) #give an init value
## Warning in if (abs(value) >= 1) {: the condition has length > 1 and only
## the first element will be used
# This function calls the correlation an AR1, is there a way to reference an AR2 in this line?
# Compare the models
treeClim.ols.summary <- summary(treeClim.ols)
treeClim.gls.summary <- summary(treeClim.gls)
treeClim.ols.summary
##
## Call:
## lm(formula = treeClim.ts ~ treeClim[[1]])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36365 -0.10279 -0.01106 0.11337 0.39611
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.6905536 0.9037781 -4.083 8.44e-05 ***
## treeClim[[1]] 0.0024813 0.0004631 5.358 4.66e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1584 on 110 degrees of freedom
## Multiple R-squared: 0.207, Adjusted R-squared: 0.1998
## F-statistic: 28.71 on 1 and 110 DF, p-value: 4.663e-07
treeClim.gls.summary
## Generalized least squares fit by REML
## Model: treeClim.ts ~ treeClim.tempts
## Data: NULL
## AIC BIC logLik
## -171.167 -157.6646 90.58349
##
## Correlation Structure: AR(1)
## Formula: ~1
## Parameter estimate(s):
## Phi <NA>
## 0.1448444 0.2000382
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 2.5387474 0.10058646 25.23945 0
## treeClim.tempts 0.3310184 0.02384847 13.88007 0
##
## Correlation:
## (Intr)
## treeClim.tempts 0.994
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.26730065 -0.58196021 -0.05727082 0.72633077 2.46950661
##
## Residual standard error: 0.1038058
## Degrees of freedom: 112 total; 110 residual
# geeky plot of the coefs and their se
# getting the coefs from the models is easy
# getting the se out is a bit more involved
#bhat
bhat.treeClim.ols <- coef(treeClim.ols)[2]
bhat.treeClim.gls <- coef(treeClim.gls)[2]
#se of bhat
bhat.se.treeClim.ols <- treeClim.ols.summary$coef[[4]]
bhat.se.treeClim.gls <- treeClim.gls.summary$tTable[2,2]
require(ggplot2)
dat <- data.frame(bhat = c(bhat.treeClim.ols,bhat.treeClim.gls),
bhat.se = c(bhat.se.treeClim.ols,bhat.se.treeClim.gls),
model = c("OLS","GLS"))
my.ylim <- c(min(range(dat$bhat-dat$bhat.se))-0.1,
max(range(dat$bhat+dat$bhat.se))+0.1)
ggplot(dat, aes(y = bhat, x = model)) +
geom_point(size = 5) +
ylab(expression(hat(beta))) + xlab("Model") +
geom_errorbar(aes(ymin=bhat-bhat.se, ymax=bhat+bhat.se), width=0.2) +
ylim(my.ylim) +
theme(text = element_text(size=14))
It is possible that my snow data are spatially and temporally autocorrelated. I will need to model this autocorrelation and clean the residuals (by region? fire regime?). Look in the book for a walkthrough.
Parhaps using a GLS model would be appropriate.