Autoregression

Autocorrelation

ESCI 597A

dygraphs

Before we start, take a look at this cool widget that I’ve been playing with here

set.seed(3624) # so we can repeat with the same random seed. You can set this to any number you like
n <- 500
epsilon <- rnorm(n = n,mean = 0,sd = 1) # white noise
x <- numeric(length = n)
phi <- 0.5
for(i in 2:n){
  x[i] <- phi * x[i-1] + epsilon[i]  
}
plot(x,type="l")

xts=ts(x,1)

#Define Line Names
legendnames=c("50-Year Moving Average", "20-Year Moving Average", "10-Year Moving Average")
#Define color factors
colors=c("blue","purple", "slateblue2")

ma50 <- filter(x, filter=rep(x=1/50,times=50), sides=2)
ma20 <- filter(x, filter=rep(x=1/20,times=20), sides=2)
ma10 <- filter(x, filter=rep(x=1/10,times=10), sides=2)
plot(x, type="l",col="grey")
lines(ma10,col="slateblue2",lwd=2)
lines(ma20,col="purple",lwd=3)
lines(ma50,col="blue",lwd=4)
legend("topleft", legend=legendnames, lwd=2,
       col=colors)

x.acf=acf(x)

str(x.acf)

## List of 6
##  $ acf   : num [1:27, 1, 1] 1 0.4897 0.2239 0.1208 0.0683 ...
##  $ type  : chr "correlation"
##  $ n.used: int 500
##  $ lag   : num [1:27, 1, 1] 0 1 2 3 4 5 6 7 8 9 ...
##  $ series: chr "x"
##  $ snames: NULL
##  - attr(*, "class")= chr "acf"

x.acf$acf

## , , 1
## 
##               [,1]
##  [1,]  1.000000000
##  [2,]  0.489682560
##  [3,]  0.223923459
##  [4,]  0.120814866
##  [5,]  0.068332266
##  [6,]  0.045694978
##  [7,]  0.039722672
##  [8,]  0.022754450
##  [9,]  0.020711211
## [10,]  0.024123484
## [11,]  0.041485217
## [12,]  0.057001420
## [13,] -0.010827702
## [14,] -0.020130046
## [15,]  0.003225389
## [16,] -0.048017434
## [17,] -0.056533023
## [18,] -0.002953526
## [19,] -0.003203477
## [20,]  0.053763036
## [21,]  0.045995155
## [22,]  0.009648256
## [23,] -0.022369129
## [24,] -0.040878457
## [25,] -0.026795658
## [26,] -0.054132417
## [27,] -0.032842159

x.pacf=pacf(x)

x.pacf

## 
## Partial autocorrelations of series 'x', by lag
## 
##      1      2      3      4      5      6      7      8      9     10 
##  0.490 -0.021  0.025  0.004  0.011  0.015 -0.007  0.011  0.011  0.030 
##     11     12     13     14     15     16     17     18     19     20 
##  0.029 -0.074  0.008  0.021 -0.074 -0.010  0.050 -0.017  0.077 -0.014 
##     21     22     23     24     25     26 
## -0.025 -0.028 -0.018  0.007 -0.056  0.035

data(nhtemp)
layout(matrix(c(1,1,2,3),ncol=2,nrow=2,byrow=T))
plot(nhtemp,type="b",ylab=expression(degree * F))
acf(nhtemp,main="")
pacf(nhtemp,main="")

Re-building variables from last time.

Analysis in R

First, we will read in the file from the *.csv

#setwd("U:/ESCI597/")
#CRLA.SNOTEL=read.csv("CRLA_SNOTEL_slim_corrected.csv", header=T, stringsAsFactors=F)
setwd("~/Desktop/WWU/ESCI597/TimeSeries")
CRLA.SNOTEL=read.csv("CRLA_SNOTEL_slim_corrected.csv", header=T, stringsAsFactors=F)

Crater lake SNOTEL site has been in service from (note that PPT values do not begin for the first two weeks)

head(CRLA.SNOTEL)

##      Date SWE PPTacc Tmax Tmin Tavg PPTinc
## 1 10/1/00   0    0.0   57   40   50    0.1
## 2 10/2/00   0    0.1   59   31   43    0.0
## 3 10/3/00   0    0.1   62   28   43    0.0
## 4 10/4/00   0    0.1   61   34   46    0.0
## 5 10/5/00   0    0.1   62   29   45    0.0
## 6 10/6/00   0    0.1   65   35   48    0.1

tail(CRLA.SNOTEL)

##         Date  SWE PPTacc Tmax Tmin Tavg PPTinc
## 5291 3/27/15 11.7   46.8   61   32   45    0.0
## 5292 3/28/15 11.4   46.8   50   26   36    0.1
## 5293 3/29/15 11.3   46.9   58   29   42    0.0
## 5294 3/30/15 11.2   46.9   57   26   42    0.0
## 5295 3/31/15 11.0   46.9   35   26   31    0.5
## 5296  4/1/15 11.5   47.4   37   19   28    0.0

The variables of interest are:

names(CRLA.SNOTEL)

## [1] "Date"   "SWE"    "PPTacc" "Tmax"   "Tmin"   "Tavg"   "PPTinc"

which refer to the Date, SWE (Snow Water Equivalent - inches), PPTacc (accumulated precipitation for the snow year, which begins October 1 - inches), Tmax (Daily Maximum Temperature - F), Tmin (Daily minimum temperature - F), Tavg (daily average temperture - F), and PPTinc (daily precipitation increments - inches)

Summary Statistics

Due to the different types of measurement here, we exclude the accumulated precipitation because it abruptly ends on October 1 when it re-sets.

library(moments)

for (ctr in c(2,4:7)){

kt <-kurtosis(CRLA.SNOTEL[[ctr]])
sk <- skewness(CRLA.SNOTEL[[ctr]])
my.xlim <- range(CRLA.SNOTEL[[ctr]])
h<-hist(CRLA.SNOTEL[[ctr]], breaks=10, col="lightblue", xlab=names(CRLA.SNOTEL)[ctr],
        main="",xlim=my.xlim) 
xfit<-seq(min(CRLA.SNOTEL[[ctr]]),max(CRLA.SNOTEL[[ctr]]),length=100) 
yfit<-dnorm(xfit,mean=mean(CRLA.SNOTEL[[ctr]]),sd=sd(CRLA.SNOTEL[[ctr]])) 
yfit <- yfit*diff(h$mids[1:2])*length(CRLA.SNOTEL[[ctr]]) 
lines(xfit, yfit, col="darkblue", lwd=2)
boxplot(CRLA.SNOTEL[[ctr]], horizontal=TRUE, add = TRUE, outline=TRUE,  axes=FALSE, ylim=my.xlim, col = "lightgreen",  boxwex=100)
#text(x = 5, y=1000, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
#text(x = 5, y=950, labels = paste("Skewness=",round(sk,2)),pos = 4)
legend("topleft", paste("Kurtosis=",round(kt,2),"
Skewness=",round(sk,2)))
}

We can learly see that the incremental precipitation is non-normal. Typically precipitatin data are log-normal, and these measurements may be transofrmed for further research.

Time Series

For our next few steps we will need the data in a Time Series format.

CRLA.SWEts <- ts(CRLA.SNOTEL[2],frequency = 1)
CRLA.PPTaccts <- ts(CRLA.SNOTEL[3],frequency = 1)
CRLA.Tmaxts <- ts(CRLA.SNOTEL[4],frequency = 1)
CRLA.Tmints <- ts(CRLA.SNOTEL[5],frequency = 1)
CRLA.Tavgts <- ts(CRLA.SNOTEL[6],frequency = 1)
CRLA.PPTincts <- ts(CRLA.SNOTEL[7],frequency = 1)

CRLA.time=time(CRLA.SWEts)

Decomposition

My data are failing to decompose, as is evidenced by the cycle(CRLA.Tavgts) function returning all “1”s. I will have to ask Andy about this. Apparently this was due to an import error, using the wrong frequency.

CRLA.Tavgts365 <- ts(CRLA.SNOTEL[6],frequency = 365, start=274/365) #Start date is 10/1/2000 or Day Of Year 274
plot(decompose(CRLA.Tavgts365))

Decomposition of Tavg. Here time is in years from 10/1/2000.

Using the lubridate and zoo packages to match dates with readings

# lubridate - handles dates very nicely, we use the function parse_date_time() 
# to import the data (arg1) and the date format (arg2)
# install.packages("lubridate")

# zoo is Zotar's Ordered Objeczoo so we the create the CRLA.Tmax object in the zoo class form
# This makes it easy to plot the data with the appropriate labesl and handles leap years
# install.packages("zoo")

library(lubridate)
library(zoo)

We will create the lubridate object “foo” by reading in the snotel dates (which are of type string). This is basically turning the test dates in the .csv into a usable format for analysis. This also allows us to plot the dates along the x-axis easily. Technically this is a POSIXct date-time object. for more see http://www.inside-r.org/packages/cran/lubridate/docs/parse_date_time

foo <- parse_date_time(CRLA.SNOTEL$Date, "m/d/y")

Using the order.by() argument we assign these lubridate POSIXct objects to measurements for eachvariable of interest:

CRLA.Datezoo <- zoo(CRLA.SNOTEL[1],order.by = foo)
CRLA.SWEzoo <- zoo(CRLA.SNOTEL[2],order.by = foo)
CRLA.PPTacczoo <- zoo(CRLA.SNOTEL[3],order.by = foo)
CRLA.Tmaxzoo <- zoo(CRLA.SNOTEL[4],order.by = foo)
CRLA.Tminzoo <- zoo(CRLA.SNOTEL[5],order.by = foo)
CRLA.Tavgzoo <- zoo(CRLA.SNOTEL[6],order.by = foo)
CRLA.PPTinczoo <- zoo(CRLA.SNOTEL[7],order.by = foo)

This then allows us to plot the measurements nicely:

plot(CRLA.PPTacczoo)

plot(CRLA.SWEzoo)

plot(CRLA.Tmaxzoo)

plot(CRLA.Tminzoo)

plot(CRLA.Tavgzoo)

plot(CRLA.PPTinczoo)

Smoothing

Of course these plots are fairly noisy at this scale. To view them better, let’s add smoothed lines using the filter() function:

SWEma365 <- filter(x=CRLA.SWEzoo, filter=rep(x=1/365,times=365), sides=2)
SWEma30 <- filter(x=CRLA.SWEzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.SWEzoo,col="grey")
lines(SWEma365,col="red",lwd=2)
lines(SWEma30,col="blue",lwd=2)

Tmaxma365 <- filter(x=CRLA.Tmaxzoo, filter=rep(x=1/365,times=365), sides=2)
Tmaxma30 <- filter(x=CRLA.Tmaxzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.Tmaxzoo,col="grey")
lines(Tmaxma365,col="red",lwd=2)
lines(Tmaxma30,col="blue",lwd=2)

Tminma365 <- filter(x=CRLA.Tminzoo, filter=rep(x=1/365,times=365), sides=2)
Tminma30 <- filter(x=CRLA.Tminzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.Tminzoo,col="grey")
lines(Tminma365,col="red",lwd=2)
lines(Tminma30,col="blue",lwd=2)

Tavgma365 <- filter(x=CRLA.Tavgzoo, filter=rep(x=1/365,times=365), sides=2)
Tavgma30 <- filter(x=CRLA.Tavgzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.Tavgzoo,col="grey")
lines(Tavgma365,col="red",lwd=2)
lines(Tavgma30,col="blue",lwd=2)

PPTaccma365 <- filter(x=CRLA.PPTacczoo, filter=rep(x=1/365,times=365), sides=2)
PPTaccma30 <- filter(x=CRLA.PPTacczoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.PPTacczoo,col="grey")
lines(PPTaccma365,col="red",lwd=2)
lines(PPTaccma30,col="blue",lwd=2)

PPTincma365 <- filter(x=CRLA.PPTinczoo, filter=rep(x=1/365,times=365), sides=2)
PPTincma30 <- filter(x=CRLA.PPTinczoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.PPTinczoo,col="grey")
lines(PPTincma365,col="red",lwd=2)
lines(PPTincma30,col="blue",lwd=2)

Trend Lines

Trend lines in R are created using the linear model function lm()

CRLA.time=time(CRLA.Tavgts)

CRLA.TavgLM=lm(CRLA.Tavgts~CRLA.time)
plot(CRLA.Tavgts,col="grey")
# Heck, let's add the trend line from our linear model! 
abline(CRLA.TavgLM, col="black",lwd=2, lty="dashed") # So satisfying! - Thanks Andy

legend("topleft", paste("Slope (?F per year) =",round(CRLA.TavgLM$coefficients[[2]]*365, 3)))

Autocorrelation

acf(CRLA.Tavgts)

hydroTSM Package

While there are many cool things to do with autocorrelation, I first wanted to process my data from daily to monthly and annual for better observation of the autocorrelation functions. Using the hydroTSM package.

#hydroTSM package

#install.packages("hydroTSM")
#install.packages("sp")

#I think this packages manipulates data.frame objects using some sweet functions like:
#subdaily2daily()
#daily2annual()
#etc.

library(hydroTSM)

## Loading required package: xts

library(sp)

## Using hydroplot to form summary statistics from CRLA.SNOTEL zoo data

## To make these plots better, I should clean up the data to remove partial years (which throw off the annual mean values)

hydroplot(CRLA.Tavgzoo, FUN=mean, ylab= "Tavg", var.unit = "?F")

hydroplot(CRLA.Tmaxzoo, FUN=mean, ylab= "Tmax", var.unit = "?F")

hydroplot(CRLA.Tminzoo, FUN=mean, ylab= "Tmin", var.unit = "?F")

hydroplot(CRLA.SWEzoo, FUN=mean, ylab= "SWE", var.unit = "Inches")

hydroplot(CRLA.PPTinczoo, FUN=mean, ylab= "PPTinc", var.unit = "Inches")

hydroplot(CRLA.PPTacczoo, FUN=mean, ylab= "PPTacc", var.unit = "Inches")

#CRLA.Tmaxannual=daily2annual(CRLA.Tmaxzoo)

## CRLA daily2annual

#CRLA.Tmaxannual=daily2annual(CRLA.Tmaxzoo, FUN=mean, na.rm = TRUE, date.fmt="%Y-%m-%d", out.fmt = "%Y")

## Yet another time series object type!
## Use xts() and bring the dates in from a string using as.Date()
CRLA.Date=as.Date(CRLA.SNOTEL$Date, "%m/%d/%Y")
# Do them individually
CRLA.Tmaxxts=xts(CRLA.SNOTEL$Tmax, order.by=CRLA.Date)
head(CRLA.Tmaxxts)

##            [,1]
## 0000-10-01   57
## 0000-10-02   59
## 0000-10-03   62
## 0000-10-04   61
## 0000-10-05   62
## 0000-10-06   65

# Do it as a matrix?
CRLA.xts=xts(CRLA.SNOTEL[,2:7], order.by=CRLA.Date)


# Sub-daily to annual ts
CRLA.Tmaxannual=daily2annual(CRLA.Tmaxxts, FUN=mean, na.rm=TRUE)
CRLA.Tmaxmonthly=daily2monthly(CRLA.Tmaxxts, FUN=mean, na.rm=TRUE)
plot(CRLA.Tmaxannual)

plot(CRLA.Tmaxmonthly)

# Do it as a matrix!
CRLA.annual=daily2annual(CRLA.xts, FUN=mean, na.rm=TRUE)
plot(CRLA.annual)

CRLA.monthly=daily2monthly(CRLA.xts, FUN=mean, na.rm=TRUE)
plot(CRLA.monthly)

## Let's look at PPTinc individually and use the sum function
CRLA.PPTincxts=xts(CRLA.SNOTEL$PPTinc, order.by=CRLA.Date)
CRLA.PPTincannual=daily2annual(CRLA.PPTincxts, FUN=sum, na.rm=TRUE)
CRLA.PPTincmonthly=daily2monthly(CRLA.PPTincxts, FUN=sum, na.rm=TRUE)
plot(CRLA.PPTincannual)

plot(CRLA.PPTincmonthly)

CRLA.monthlyacf=acf(coredata(CRLA.monthly))

str(CRLA.monthlyacf)

## List of 6
##  $ acf   : num [1:15, 1:6, 1:6] 1 0.8223 0.4521 0.0605 -0.279 ...
##  $ type  : chr "correlation"
##  $ n.used: int 175
##  $ lag   : num [1:15, 1:6, 1:6] 0 1 2 3 4 5 6 7 8 9 ...
##  $ series: chr "coredata(CRLA.monthly)"
##  $ snames: chr [1:6] "SWE" "PPTacc" "Tmax" "Tmin" ...
##  - attr(*, "class")= chr "acf"

Autoregression

Donal O’Leary

Monday, April 13, 2015

Autocorrelation

dygraphs

Re-building variables from last time.

Analysis in R

Summary Statistics

Time Series

Decomposition

Using the lubridate and zoo packages to match dates with readings

Smoothing

Trend Lines

Autocorrelation

hydroTSM Package