Predict the sunspot data from 1950 to the present using the predict
function.
data(sunspot.year)
sun.train <- window(sunspot.year, end = 1950)
sun.test <- window(sunspot.year, start = 1950)
sunspot.ar <- ar(sun.train)
sunspot.ar
##
## Call:
## ar(x = sun.train)
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 1.2092 -0.4558 -0.1265 0.1147 -0.1081 0.0864 -0.0936 0.0786
## 9
## 0.1172
##
## Order selected 9 sigma^2 estimated as 220
sunspot.forecast <- predict(sunspot.ar, n.ahead = 27)
plot(sunspot.year,xlim=c(1700, 2015),col="grey",lwd=1.5,
ylab="Sunspots (n)")
lines(sunspot.forecast$pred,col="green",lwd=1.5)
Because the demonstrated ARMA modle of the sunspot data chose the best ARMA fit to be an AR(9,0,0) I will use this structure to predict or forcast the the withheld data and compare the skill of the model based on the forcasted data to the skill of the trained model. To do this I will use the forcast
function. The root mean suqared error of each model is reported using the accuracy
call. The RSME of the the training model is 14.04139, and the RSME of the the training model is 17.61562
fit <- arima(sun.train, order=c(9, 0, 0))
fit
##
## Call:
## arima(x = sun.train, order = c(9, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 1.2003 -0.4387 -0.1553 0.1688 -0.1422 0.0502 -0.0388 0.0620
## s.e. 0.0627 0.0996 0.1050 0.1072 0.1076 0.1088 0.1091 0.1042
## ar9 intercept
## 0.1313 44.6113
## s.e. 0.0658 5.2650
##
## sigma^2 estimated as 197.2: log likelihood = -1021.02, aic = 2064.05
res<-residuals(fit)
n<-
summary(res)
#predictive accuracy
library(forecast)
accuracy(fit)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1986178 14.04139 10.58618 -Inf Inf 0.637 -0.004203847
#predict next 67 observations
plot(forecast(fit, 67))
fit2 <- arima(sun.test, order=c(9, 0, 0))
fit2
##
## Call:
## arima(x = sun.test, order = c(9, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 1.0669 -0.4123 -0.1504 -0.0265 -0.0342 0.1393 -0.1113 -0.2937
## s.e. 0.1534 0.2313 0.2405 0.2433 0.2487 0.2584 0.2665 0.2554
## ar9 intercept
## 0.4110 79.6430
## s.e. 0.1674 6.8596
##
## sigma^2 estimated as 310.3: log likelihood = -170.01, aic = 362.02
accuracy(fit2)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.158715 17.61562 14.08266 -24.44489 43.85389 0.5295806
## ACF1
## Training set 0.04481244
###Woodhouse data exporation
This was a very neat paper!
my.data<- read.csv(file="C:\\Users\\jesse_000\\Desktop\\Grad\\ESCI 597\\Homework\\RPubs - Homework 3_files\\woodhouse.csv", header=TRUE, sep=",")
par(mfrow=c(2:1))
plot(LeesWYflow~year,type="b", data=my.data)
plot(OctAprP~year,type="b", data=my.data)
`
#This plot is AWESOME
library(moments)
## Warning: package 'moments' was built under R version 3.2.5
kt <-kurtosis(my.data$LeesWYflow)
sk <- skewness(my.data$LeesWYflow)
my.xlim <- range(my.data$LeesWYflow)
h<-hist(my.data$LeesWYflow, breaks=20, col="lightblue", xlab="Stream Flow (m^3)/s",
main="",xlim=my.xlim)
xfit<-seq(min(my.data$LeesWYflow),max(my.data$LeesWYflow),length=100)
yfit<-dnorm(xfit,mean=mean(my.data$LeesWYflow),sd=sd(my.data$LeesWYflow))
yfit <- yfit*diff(h$mids[1:2])*length(my.data$LeesWYflow)
lines(xfit, yfit, col="darkblue", lwd=2)
boxplot(my.data$LeesWYflow, horizontal=TRUE, outline=TRUE, axes=FALSE,
ylim=my.xlim, col = "lightgreen", add = TRUE, boxwex=3)
text(x = 576, y=15, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x = 576, y=14, labels = paste("Skewness=",round(sk,2)),pos = 4)
Lets just start with a basic linear model to get warmed up and then we’ll move on to the GLS model type.
par(mfrow=c(2:1))
flow.lm<-lm(LeesWYflow~OctAprP,data = my.data)
acf(residuals(flow.lm))
pacf(residuals(flow.lm))
library(nlme)
flow.lm<-lm(LeesWYflow~OctAprP,data = my.data)
ar1 <- ar(residuals(flow.lm))$ar
ar1
## [1] 0.3659556 0.1572489
gls1 <- gls(LeesWYflow~OctAprP, correlation = corAR1(ar1),data=my.data) #give an initial value
acf(resid(gls1, type = "normalized"))
summary(flow.lm)
##
## Call:
## lm(formula = LeesWYflow ~ OctAprP, data = my.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5239145 -1691189 -301282 1702819 7394534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3037032 1274631 -2.383 0.019 *
## OctAprP 81921 5719 14.324 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2514000 on 105 degrees of freedom
## Multiple R-squared: 0.6615, Adjusted R-squared: 0.6583
## F-statistic: 205.2 on 1 and 105 DF, p-value: < 2.2e-16
summary(gls1)
## Generalized least squares fit by REML
## Model: LeesWYflow ~ OctAprP
## Data: my.data
## AIC BIC logLik
## 3395.404 3408.673 -1692.702
##
## Correlation Structure: AR(1)
## Formula: ~1
## Parameter estimate(s):
## Phi <NA>
## 0.4643182 0.1572489
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) -1902931.7 1076429.6 -1.767818 0.08
## OctAprP 76616.7 4562.8 16.791726 0.00
##
## Correlation:
## (Intr)
## OctAprP -0.927
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.0597839 -0.7152264 -0.1221678 0.6774445 2.9347823
##
## Residual standard error: 2537929
## Degrees of freedom: 107 total; 105 residual
Looking at the cumulative sum is not super useful but now we know that the Not super useful
par(mfrow=c(2:1))
csum1<-cumsum(my.data$LeesWYflow)
plot(my.data$year,csum1)
csum2<-cumsum(my.data$OctAprP)
plot(my.data$year,csum2)