R Markdown
sunspot<-readRDS("C://Users/Nattybug/Downloads/sunspots.rds")
sun.train<-window(sunspot, end = 1950)
sun.test<-window(sunspot, start = 1950)
#plotting
#The red data will be our training window and the blue will be the data we test the forecast with
plot(sunspot,col="black",ylab="Sunspots (n)")
lines(sun.train,col="red",lty="dotted",lwd=2)
lines(sun.test,col="blue",lty="dotted",lwd=2)

#forecasting
#Instead of trying to run an ARMA with a moving average component, I practiced with the AR function to find the best fit p
data(sunspot.year)
sunspot.ar <- ar(sunspot.year)
sunspot.ar
##
## Call:
## ar(x = sunspot.year)
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 1.1305 -0.3524 -0.1745 0.1403 -0.1358 0.0963 -0.0556 0.0076
## 9
## 0.1941
##
## Order selected 9 sigma^2 estimated as 267.5
#The AR function has found that the least wrong model is a 9th order AR model, where the sunspots are strongly correlated every 9th year
#Now we can forecast using that least wrong model to predict 3 cycles (27 years) into the future
sunspot.forecast <- predict(sunspot.ar, n.ahead = 27)
plot(sunspot.year,xlim=c(1700, 2015),col="purple",lwd=1.5,
ylab="Sunspots (n)")
lines(sunspot.forecast$pred,col="green",lwd=1.5)

#I wanted to see what happens when we predict much farther into the future, to see if the cycles forecasted really flattened out and reflected a reduced "echo"
sunspot.forecast2<-predict(sunspot.ar,n.ahead=54)
plot(sunspot.year,xlim=c(1700,2060),col="purple",lwd=1.5,ylab="sunspots")
lines(sunspot.forecast$pred,col="green",lwd=1.5)
lines(sunspot.forecast2$pred,col="red",lwd=1.5,lty="dotted")

#Its true, as the forecasting predicts farther out, the amplitude of the cycles lessens and becomes more flat
#This makes sense because farther into the future, the prediction is based off another predicted factor, not off any real data
#Adding in the standard error window to get a better picture of the "wiggle room" or possible range of future values
upper <- sunspot.forecast$pred + sunspot.forecast$se
lower <- sunspot.forecast$pred - sunspot.forecast$se
xx <- c(time(sunspot.forecast$pred), rev(time(sunspot.forecast$pred)))
yy <- c(upper,rev(lower))
plot(sunspot.year,xlim=c(1970, 2015),col="purple",lwd=1.5, type="b",pch=20,
ylab="Sunspots (n)")
polygon(xx,yy,col="lightgreen",border = NA)
points(sunspot.forecast$pred,col="darkgreen",lwd=1.5,pch=20)
lines(sunspot.forecast$pred,col="darkgreen",lwd=1.5)

##Now I'm going to re-do the AR model for only the training part of the data
data(sunspot.year)
sunspot.train<-window(sunspot.year,end=1950)
sunspot.train.ar <- ar(sunspot.train)
sunspot.train.ar
##
## Call:
## ar(x = sunspot.train)
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 1.2092 -0.4558 -0.1265 0.1147 -0.1081 0.0864 -0.0936 0.0786
## 9
## 0.1172
##
## Order selected 9 sigma^2 estimated as 220
sunspot.test<-window(sunspot.year,start=1950)
##The AR model for just the training window was also AR(9), or the least bad model. I will continue with forecasting from the training data
sunspot.train.forecast <- predict(sunspot.train.ar, n.ahead = 27)
plot(sunspot.train,xlim=c(1700, 2015),col="purple",lwd=1.5,
ylab="Sunspots (n)")
lines(sunspot.train.forecast$pred,col="green",lwd=1.5)
lines(sunspot.test,col="red",lty="dotted",lwd=1)

##The forecasted data (green) has a smaller amplitude than the testing data (red) we set aside. This is what we expected to see with the forecasted data.
#Woodhouse data
woodhouse<-read.csv("C://Users/Nattybug/Downloads/woodhouse.csv")
woodhouse.ts <- ts(woodhouse[,-1], start=1906, frequency=1)
tsp(woodhouse.ts)
## [1] 1906 2012 1
ts.plot(woodhouse.ts)

plot(woodhouse.ts,par(c=1,1))

LeesWYflow<-ts(woodhouse[2])
year<-ts(woodhouse[1])
#Lets get some residuals
ols1<-lm(LeesWYflow~year)
summary(ols1)
##
## Call:
## lm(formula = LeesWYflow ~ year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9069209 -3090093 -542999 2878269 10108319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78446044 25757296 3.046 0.00294 **
## year -32445 13147 -2.468 0.01520 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4200000 on 105 degrees of freedom
## Multiple R-squared: 0.05483, Adjusted R-squared: 0.04583
## F-statistic: 6.091 on 1 and 105 DF, p-value: 0.0152
#look at the autocorrelation for the residuals
acf(residuals(ols1))

pacf(residuals(ols1))

#nothing after lag 1, looks like an AR(1) model