Regression_HW

Predict the sunspot data from 1950 to the present using the predict function.

data(sunspot.year)
sun.train <- window(sunspot.year, end = 1950)  
sun.test <- window(sunspot.year, start = 1950)
sunspot.ar <- ar(sun.train)
sunspot.ar

## 
## Call:
## ar(x = sun.train)
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  1.2092  -0.4558  -0.1265   0.1147  -0.1081   0.0864  -0.0936   0.0786  
##       9  
##  0.1172  
## 
## Order selected 9  sigma^2 estimated as  220

sunspot.forecast <- predict(sunspot.ar, n.ahead = 27)
plot(sunspot.year,xlim=c(1700, 2015),col="grey",lwd=1.5, 
     ylab="Sunspots (n)")
lines(sunspot.forecast$pred,col="green",lwd=1.5)

Because the demonstrated ARMA modle of the sunspot data chose the best ARMA fit to be an AR(9,0,0) I will use this structure to predict or forcast the the withheld data and compare the skill of the model based on the forcasted data to the skill of the trained model. To do this I will use the forcast function. The root mean suqared error of each model is reported using the accuracy call. The RSME of the the training model is 14.04139, and the RSME of the the training model is 17.61562

fit <- arima(sun.train, order=c(9, 0, 0))
fit

## 
## Call:
## arima(x = sun.train, order = c(9, 0, 0))
## 
## Coefficients:
##          ar1      ar2      ar3     ar4      ar5     ar6      ar7     ar8
##       1.2003  -0.4387  -0.1553  0.1688  -0.1422  0.0502  -0.0388  0.0620
## s.e.  0.0627   0.0996   0.1050  0.1072   0.1076  0.1088   0.1091  0.1042
##          ar9  intercept
##       0.1313    44.6113
## s.e.  0.0658     5.2650
## 
## sigma^2 estimated as 197.2:  log likelihood = -1021.02,  aic = 2064.05

res<-residuals(fit)
n<-
summary(res)
#predictive accuracy
library(forecast)
accuracy(fit)

##                     ME     RMSE      MAE  MPE MAPE  MASE         ACF1
## Training set 0.1986178 14.04139 10.58618 -Inf  Inf 0.637 -0.004203847

#predict next 67 observations

plot(forecast(fit, 67))

fit2 <- arima(sun.test, order=c(9, 0, 0))
fit2

## 
## Call:
## arima(x = sun.test, order = c(9, 0, 0))
## 
## Coefficients:
##          ar1      ar2      ar3      ar4      ar5     ar6      ar7      ar8
##       1.0669  -0.4123  -0.1504  -0.0265  -0.0342  0.1393  -0.1113  -0.2937
## s.e.  0.1534   0.2313   0.2405   0.2433   0.2487  0.2584   0.2665   0.2554
##          ar9  intercept
##       0.4110    79.6430
## s.e.  0.1674     6.8596
## 
## sigma^2 estimated as 310.3:  log likelihood = -170.01,  aic = 362.02

accuracy(fit2)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -1.158715 17.61562 14.08266 -24.44489 43.85389 0.5295806
##                    ACF1
## Training set 0.04481244

###Woodhouse data exporation

This was a very neat paper!

my.data<- read.csv(file="C:\\Users\\jesse_000\\Desktop\\Grad\\ESCI 597\\Homework\\RPubs - Homework 3_files\\woodhouse.csv", header=TRUE, sep=",")
par(mfrow=c(2:1))
plot(LeesWYflow~year,type="b", data=my.data)
plot(OctAprP~year,type="b", data=my.data)

#This plot is AWESOME
library(moments)

## Warning: package 'moments' was built under R version 3.2.5

kt <-kurtosis(my.data$LeesWYflow)
sk <- skewness(my.data$LeesWYflow)
my.xlim <- range(my.data$LeesWYflow)
h<-hist(my.data$LeesWYflow, breaks=20, col="lightblue", xlab="Stream Flow (m^3)/s",
        main="",xlim=my.xlim) 
xfit<-seq(min(my.data$LeesWYflow),max(my.data$LeesWYflow),length=100) 
yfit<-dnorm(xfit,mean=mean(my.data$LeesWYflow),sd=sd(my.data$LeesWYflow)) 
yfit <- yfit*diff(h$mids[1:2])*length(my.data$LeesWYflow) 
lines(xfit, yfit, col="darkblue", lwd=2)
boxplot(my.data$LeesWYflow, horizontal=TRUE,  outline=TRUE,  axes=FALSE,
        ylim=my.xlim, col = "lightgreen", add = TRUE, boxwex=3)
text(x = 576, y=15, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x = 576, y=14, labels = paste("Skewness=",round(sk,2)),pos = 4)

Lets just start with a basic linear model to get warmed up and then we’ll move on to the GLS model type.

par(mfrow=c(2:1))
flow.lm<-lm(LeesWYflow~OctAprP,data = my.data)
acf(residuals(flow.lm))
pacf(residuals(flow.lm))

library(nlme)
flow.lm<-lm(LeesWYflow~OctAprP,data = my.data)
ar1 <- ar(residuals(flow.lm))$ar
ar1

## [1] 0.3659556 0.1572489

gls1 <- gls(LeesWYflow~OctAprP, correlation = corAR1(ar1),data=my.data) #give an initial value 
acf(resid(gls1, type = "normalized"))

summary(flow.lm)

## 
## Call:
## lm(formula = LeesWYflow ~ OctAprP, data = my.data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5239145 -1691189  -301282  1702819  7394534 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3037032    1274631  -2.383    0.019 *  
## OctAprP        81921       5719  14.324   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2514000 on 105 degrees of freedom
## Multiple R-squared:  0.6615, Adjusted R-squared:  0.6583 
## F-statistic: 205.2 on 1 and 105 DF,  p-value: < 2.2e-16

summary(gls1)

## Generalized least squares fit by REML
##   Model: LeesWYflow ~ OctAprP 
##   Data: my.data 
##        AIC      BIC    logLik
##   3395.404 3408.673 -1692.702
## 
## Correlation Structure: AR(1)
##  Formula: ~1 
##  Parameter estimate(s):
##       Phi      <NA> 
## 0.4643182 0.1572489 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept) -1902931.7 1076429.6 -1.767818    0.08
## OctAprP        76616.7    4562.8 16.791726    0.00
## 
##  Correlation: 
##         (Intr)
## OctAprP -0.927
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0597839 -0.7152264 -0.1221678  0.6774445  2.9347823 
## 
## Residual standard error: 2537929 
## Degrees of freedom: 107 total; 105 residual

Other plots

Looking at the cumulative sum is not super useful but now we know that the Not super useful

par(mfrow=c(2:1))
csum1<-cumsum(my.data$LeesWYflow)
plot(my.data$year,csum1)
csum2<-cumsum(my.data$OctAprP)
plot(my.data$year,csum2)

Regression_HW_Klinger

JTK

April 15, 2017

Other plots