ESCI 597A
Before we start, take a look at this cool widget that I’ve been playing with here
set.seed(3624) # so we can repeat with the same random seed. You can set this to any number you like
n <- 500
epsilon <- rnorm(n = n,mean = 0,sd = 1) # white noise
x <- numeric(length = n)
phi <- 0.5
for(i in 2:n){
x[i] <- phi * x[i-1] + epsilon[i]
}
plot(x,type="l")
xts=ts(x,1)
#Define Line Names
legendnames=c("50-Year Moving Average", "20-Year Moving Average", "10-Year Moving Average")
#Define color factors
colors=c("blue","purple", "slateblue2")
ma50 <- filter(x, filter=rep(x=1/50,times=50), sides=2)
ma20 <- filter(x, filter=rep(x=1/20,times=20), sides=2)
ma10 <- filter(x, filter=rep(x=1/10,times=10), sides=2)
plot(x, type="l",col="grey")
lines(ma10,col="slateblue2",lwd=2)
lines(ma20,col="purple",lwd=3)
lines(ma50,col="blue",lwd=4)
legend("topleft", legend=legendnames, lwd=2,
col=colors)
x.acf=acf(x)
str(x.acf)
## List of 6
## $ acf : num [1:27, 1, 1] 1 0.4897 0.2239 0.1208 0.0683 ...
## $ type : chr "correlation"
## $ n.used: int 500
## $ lag : num [1:27, 1, 1] 0 1 2 3 4 5 6 7 8 9 ...
## $ series: chr "x"
## $ snames: NULL
## - attr(*, "class")= chr "acf"
x.acf$acf
## , , 1
##
## [,1]
## [1,] 1.000000000
## [2,] 0.489682560
## [3,] 0.223923459
## [4,] 0.120814866
## [5,] 0.068332266
## [6,] 0.045694978
## [7,] 0.039722672
## [8,] 0.022754450
## [9,] 0.020711211
## [10,] 0.024123484
## [11,] 0.041485217
## [12,] 0.057001420
## [13,] -0.010827702
## [14,] -0.020130046
## [15,] 0.003225389
## [16,] -0.048017434
## [17,] -0.056533023
## [18,] -0.002953526
## [19,] -0.003203477
## [20,] 0.053763036
## [21,] 0.045995155
## [22,] 0.009648256
## [23,] -0.022369129
## [24,] -0.040878457
## [25,] -0.026795658
## [26,] -0.054132417
## [27,] -0.032842159
x.pacf=pacf(x)
x.pacf
##
## Partial autocorrelations of series 'x', by lag
##
## 1 2 3 4 5 6 7 8 9 10
## 0.490 -0.021 0.025 0.004 0.011 0.015 -0.007 0.011 0.011 0.030
## 11 12 13 14 15 16 17 18 19 20
## 0.029 -0.074 0.008 0.021 -0.074 -0.010 0.050 -0.017 0.077 -0.014
## 21 22 23 24 25 26
## -0.025 -0.028 -0.018 0.007 -0.056 0.035
data(nhtemp)
layout(matrix(c(1,1,2,3),ncol=2,nrow=2,byrow=T))
plot(nhtemp,type="b",ylab=expression(degree * F))
acf(nhtemp,main="")
pacf(nhtemp,main="")
First, we will read in the file from the *.csv
#setwd("U:/ESCI597/")
#CRLA.SNOTEL=read.csv("CRLA_SNOTEL_slim_corrected.csv", header=T, stringsAsFactors=F)
setwd("~/Desktop/WWU/ESCI597/TimeSeries")
CRLA.SNOTEL=read.csv("CRLA_SNOTEL_slim_corrected.csv", header=T, stringsAsFactors=F)
Crater lake SNOTEL site has been in service from (note that PPT values do not begin for the first two weeks)
head(CRLA.SNOTEL)
## Date SWE PPTacc Tmax Tmin Tavg PPTinc
## 1 10/1/00 0 0.0 57 40 50 0.1
## 2 10/2/00 0 0.1 59 31 43 0.0
## 3 10/3/00 0 0.1 62 28 43 0.0
## 4 10/4/00 0 0.1 61 34 46 0.0
## 5 10/5/00 0 0.1 62 29 45 0.0
## 6 10/6/00 0 0.1 65 35 48 0.1
tail(CRLA.SNOTEL)
## Date SWE PPTacc Tmax Tmin Tavg PPTinc
## 5291 3/27/15 11.7 46.8 61 32 45 0.0
## 5292 3/28/15 11.4 46.8 50 26 36 0.1
## 5293 3/29/15 11.3 46.9 58 29 42 0.0
## 5294 3/30/15 11.2 46.9 57 26 42 0.0
## 5295 3/31/15 11.0 46.9 35 26 31 0.5
## 5296 4/1/15 11.5 47.4 37 19 28 0.0
The variables of interest are:
names(CRLA.SNOTEL)
## [1] "Date" "SWE" "PPTacc" "Tmax" "Tmin" "Tavg" "PPTinc"
which refer to the Date, SWE (Snow Water Equivalent - inches), PPTacc (accumulated precipitation for the snow year, which begins October 1 - inches), Tmax (Daily Maximum Temperature - F), Tmin (Daily minimum temperature - F), Tavg (daily average temperture - F), and PPTinc (daily precipitation increments - inches)
Due to the different types of measurement here, we exclude the accumulated precipitation because it abruptly ends on October 1 when it re-sets.
library(moments)
for (ctr in c(2,4:7)){
kt <-kurtosis(CRLA.SNOTEL[[ctr]])
sk <- skewness(CRLA.SNOTEL[[ctr]])
my.xlim <- range(CRLA.SNOTEL[[ctr]])
h<-hist(CRLA.SNOTEL[[ctr]], breaks=10, col="lightblue", xlab=names(CRLA.SNOTEL)[ctr],
main="",xlim=my.xlim)
xfit<-seq(min(CRLA.SNOTEL[[ctr]]),max(CRLA.SNOTEL[[ctr]]),length=100)
yfit<-dnorm(xfit,mean=mean(CRLA.SNOTEL[[ctr]]),sd=sd(CRLA.SNOTEL[[ctr]]))
yfit <- yfit*diff(h$mids[1:2])*length(CRLA.SNOTEL[[ctr]])
lines(xfit, yfit, col="darkblue", lwd=2)
boxplot(CRLA.SNOTEL[[ctr]], horizontal=TRUE, add = TRUE, outline=TRUE, axes=FALSE, ylim=my.xlim, col = "lightgreen", boxwex=100)
#text(x = 5, y=1000, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
#text(x = 5, y=950, labels = paste("Skewness=",round(sk,2)),pos = 4)
legend("topleft", paste("Kurtosis=",round(kt,2),"
Skewness=",round(sk,2)))
}
We can learly see that the incremental precipitation is non-normal. Typically precipitatin data are log-normal, and these measurements may be transofrmed for further research.
For our next few steps we will need the data in a Time Series format.
CRLA.SWEts <- ts(CRLA.SNOTEL[2],frequency = 1)
CRLA.PPTaccts <- ts(CRLA.SNOTEL[3],frequency = 1)
CRLA.Tmaxts <- ts(CRLA.SNOTEL[4],frequency = 1)
CRLA.Tmints <- ts(CRLA.SNOTEL[5],frequency = 1)
CRLA.Tavgts <- ts(CRLA.SNOTEL[6],frequency = 1)
CRLA.PPTincts <- ts(CRLA.SNOTEL[7],frequency = 1)
CRLA.time=time(CRLA.SWEts)
My data are failing to decompose, as is evidenced by the cycle(CRLA.Tavgts) function returning all “1”s. I will have to ask Andy about this. Apparently this was due to an import error, using the wrong frequency.
CRLA.Tavgts365 <- ts(CRLA.SNOTEL[6],frequency = 365, start=274/365) #Start date is 10/1/2000 or Day Of Year 274
plot(decompose(CRLA.Tavgts365))
Decomposition of Tavg. Here time is in years from 10/1/2000.
# lubridate - handles dates very nicely, we use the function parse_date_time()
# to import the data (arg1) and the date format (arg2)
# install.packages("lubridate")
# zoo is Zotar's Ordered Objeczoo so we the create the CRLA.Tmax object in the zoo class form
# This makes it easy to plot the data with the appropriate labesl and handles leap years
# install.packages("zoo")
library(lubridate)
library(zoo)
We will create the lubridate object “foo” by reading in the snotel dates (which are of type string). This is basically turning the test dates in the .csv into a usable format for analysis. This also allows us to plot the dates along the x-axis easily. Technically this is a POSIXct date-time object. for more see http://www.inside-r.org/packages/cran/lubridate/docs/parse_date_time
foo <- parse_date_time(CRLA.SNOTEL$Date, "m/d/y")
Using the order.by() argument we assign these lubridate POSIXct objects to measurements for eachvariable of interest:
CRLA.Datezoo <- zoo(CRLA.SNOTEL[1],order.by = foo)
CRLA.SWEzoo <- zoo(CRLA.SNOTEL[2],order.by = foo)
CRLA.PPTacczoo <- zoo(CRLA.SNOTEL[3],order.by = foo)
CRLA.Tmaxzoo <- zoo(CRLA.SNOTEL[4],order.by = foo)
CRLA.Tminzoo <- zoo(CRLA.SNOTEL[5],order.by = foo)
CRLA.Tavgzoo <- zoo(CRLA.SNOTEL[6],order.by = foo)
CRLA.PPTinczoo <- zoo(CRLA.SNOTEL[7],order.by = foo)
This then allows us to plot the measurements nicely:
plot(CRLA.PPTacczoo)
plot(CRLA.SWEzoo)
plot(CRLA.Tmaxzoo)
plot(CRLA.Tminzoo)
plot(CRLA.Tavgzoo)
plot(CRLA.PPTinczoo)
Of course these plots are fairly noisy at this scale. To view them better, let’s add smoothed lines using the filter() function:
SWEma365 <- filter(x=CRLA.SWEzoo, filter=rep(x=1/365,times=365), sides=2)
SWEma30 <- filter(x=CRLA.SWEzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.SWEzoo,col="grey")
lines(SWEma365,col="red",lwd=2)
lines(SWEma30,col="blue",lwd=2)
Tmaxma365 <- filter(x=CRLA.Tmaxzoo, filter=rep(x=1/365,times=365), sides=2)
Tmaxma30 <- filter(x=CRLA.Tmaxzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.Tmaxzoo,col="grey")
lines(Tmaxma365,col="red",lwd=2)
lines(Tmaxma30,col="blue",lwd=2)
Tminma365 <- filter(x=CRLA.Tminzoo, filter=rep(x=1/365,times=365), sides=2)
Tminma30 <- filter(x=CRLA.Tminzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.Tminzoo,col="grey")
lines(Tminma365,col="red",lwd=2)
lines(Tminma30,col="blue",lwd=2)
Tavgma365 <- filter(x=CRLA.Tavgzoo, filter=rep(x=1/365,times=365), sides=2)
Tavgma30 <- filter(x=CRLA.Tavgzoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.Tavgzoo,col="grey")
lines(Tavgma365,col="red",lwd=2)
lines(Tavgma30,col="blue",lwd=2)
PPTaccma365 <- filter(x=CRLA.PPTacczoo, filter=rep(x=1/365,times=365), sides=2)
PPTaccma30 <- filter(x=CRLA.PPTacczoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.PPTacczoo,col="grey")
lines(PPTaccma365,col="red",lwd=2)
lines(PPTaccma30,col="blue",lwd=2)
PPTincma365 <- filter(x=CRLA.PPTinczoo, filter=rep(x=1/365,times=365), sides=2)
PPTincma30 <- filter(x=CRLA.PPTinczoo, filter=rep(x=1/90,times=90), sides=2)
plot(CRLA.PPTinczoo,col="grey")
lines(PPTincma365,col="red",lwd=2)
lines(PPTincma30,col="blue",lwd=2)
Trend lines in R are created using the linear model function lm()
CRLA.time=time(CRLA.Tavgts)
CRLA.TavgLM=lm(CRLA.Tavgts~CRLA.time)
plot(CRLA.Tavgts,col="grey")
# Heck, let's add the trend line from our linear model!
abline(CRLA.TavgLM, col="black",lwd=2, lty="dashed") # So satisfying! - Thanks Andy
legend("topleft", paste("Slope (?F per year) =",round(CRLA.TavgLM$coefficients[[2]]*365, 3)))
acf(CRLA.Tavgts)
While there are many cool things to do with autocorrelation, I first wanted to process my data from daily to monthly and annual for better observation of the autocorrelation functions. Using the hydroTSM package.
#hydroTSM package
#install.packages("hydroTSM")
#install.packages("sp")
#I think this packages manipulates data.frame objects using some sweet functions like:
#subdaily2daily()
#daily2annual()
#etc.
library(hydroTSM)
## Loading required package: xts
library(sp)
## Using hydroplot to form summary statistics from CRLA.SNOTEL zoo data
## To make these plots better, I should clean up the data to remove partial years (which throw off the annual mean values)
hydroplot(CRLA.Tavgzoo, FUN=mean, ylab= "Tavg", var.unit = "?F")
hydroplot(CRLA.Tmaxzoo, FUN=mean, ylab= "Tmax", var.unit = "?F")
hydroplot(CRLA.Tminzoo, FUN=mean, ylab= "Tmin", var.unit = "?F")
hydroplot(CRLA.SWEzoo, FUN=mean, ylab= "SWE", var.unit = "Inches")
hydroplot(CRLA.PPTinczoo, FUN=mean, ylab= "PPTinc", var.unit = "Inches")
hydroplot(CRLA.PPTacczoo, FUN=mean, ylab= "PPTacc", var.unit = "Inches")
#CRLA.Tmaxannual=daily2annual(CRLA.Tmaxzoo)
## CRLA daily2annual
#CRLA.Tmaxannual=daily2annual(CRLA.Tmaxzoo, FUN=mean, na.rm = TRUE, date.fmt="%Y-%m-%d", out.fmt = "%Y")
## Yet another time series object type!
## Use xts() and bring the dates in from a string using as.Date()
CRLA.Date=as.Date(CRLA.SNOTEL$Date, "%m/%d/%Y")
# Do them individually
CRLA.Tmaxxts=xts(CRLA.SNOTEL$Tmax, order.by=CRLA.Date)
head(CRLA.Tmaxxts)
## [,1]
## 0000-10-01 57
## 0000-10-02 59
## 0000-10-03 62
## 0000-10-04 61
## 0000-10-05 62
## 0000-10-06 65
# Do it as a matrix?
CRLA.xts=xts(CRLA.SNOTEL[,2:7], order.by=CRLA.Date)
# Sub-daily to annual ts
CRLA.Tmaxannual=daily2annual(CRLA.Tmaxxts, FUN=mean, na.rm=TRUE)
CRLA.Tmaxmonthly=daily2monthly(CRLA.Tmaxxts, FUN=mean, na.rm=TRUE)
plot(CRLA.Tmaxannual)
plot(CRLA.Tmaxmonthly)
# Do it as a matrix!
CRLA.annual=daily2annual(CRLA.xts, FUN=mean, na.rm=TRUE)
plot(CRLA.annual)
CRLA.monthly=daily2monthly(CRLA.xts, FUN=mean, na.rm=TRUE)
plot(CRLA.monthly)
## Let's look at PPTinc individually and use the sum function
CRLA.PPTincxts=xts(CRLA.SNOTEL$PPTinc, order.by=CRLA.Date)
CRLA.PPTincannual=daily2annual(CRLA.PPTincxts, FUN=sum, na.rm=TRUE)
CRLA.PPTincmonthly=daily2monthly(CRLA.PPTincxts, FUN=sum, na.rm=TRUE)
plot(CRLA.PPTincannual)
plot(CRLA.PPTincmonthly)
CRLA.monthlyacf=acf(coredata(CRLA.monthly))
str(CRLA.monthlyacf)
## List of 6
## $ acf : num [1:15, 1:6, 1:6] 1 0.8223 0.4521 0.0605 -0.279 ...
## $ type : chr "correlation"
## $ n.used: int 175
## $ lag : num [1:15, 1:6, 1:6] 0 1 2 3 4 5 6 7 8 9 ...
## $ series: chr "coredata(CRLA.monthly)"
## $ snames: chr [1:6] "SWE" "PPTacc" "Tmax" "Tmin" ...
## - attr(*, "class")= chr "acf"