R Markdown

sunspot<-readRDS("C://Users/Nattybug/Downloads/sunspots.rds")
sun.train<-window(sunspot, end = 1950)  
sun.test<-window(sunspot, start = 1950)

#plotting
#The red data will be our training window and the blue will be the data we test the forecast with
plot(sunspot,col="black",ylab="Sunspots (n)")
lines(sun.train,col="red",lty="dotted",lwd=2)
lines(sun.test,col="blue",lty="dotted",lwd=2)

 #forecasting
#Instead of trying to run an ARMA with a moving average component, I practiced with the AR function to find the best fit p
data(sunspot.year)
sunspot.ar <- ar(sunspot.year)
sunspot.ar
## 
## Call:
## ar(x = sunspot.year)
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  1.1305  -0.3524  -0.1745   0.1403  -0.1358   0.0963  -0.0556   0.0076  
##       9  
##  0.1941  
## 
## Order selected 9  sigma^2 estimated as  267.5
#The AR function has found that the least wrong model is a 9th order AR model, where the sunspots are strongly correlated every 9th year

#Now we can forecast using that least wrong model to predict 3 cycles (27 years) into the future
sunspot.forecast <- predict(sunspot.ar, n.ahead = 27)
plot(sunspot.year,xlim=c(1700, 2015),col="purple",lwd=1.5, 
     ylab="Sunspots (n)")
lines(sunspot.forecast$pred,col="green",lwd=1.5)

#I wanted to see what happens when we predict much farther into the future, to see if the cycles forecasted really flattened out and reflected a reduced "echo"
sunspot.forecast2<-predict(sunspot.ar,n.ahead=54)
plot(sunspot.year,xlim=c(1700,2060),col="purple",lwd=1.5,ylab="sunspots")
lines(sunspot.forecast$pred,col="green",lwd=1.5)
lines(sunspot.forecast2$pred,col="red",lwd=1.5,lty="dotted")

#Its true, as the forecasting predicts farther out, the amplitude of the cycles lessens and becomes more flat
#This makes sense because farther into the future, the prediction is based off another predicted factor, not off any real data
#Adding in the standard error window to get a better picture of the "wiggle room" or possible range of future values
upper <- sunspot.forecast$pred + sunspot.forecast$se
lower <- sunspot.forecast$pred - sunspot.forecast$se
xx <- c(time(sunspot.forecast$pred), rev(time(sunspot.forecast$pred)))
yy <- c(upper,rev(lower))
plot(sunspot.year,xlim=c(1970, 2015),col="purple",lwd=1.5, type="b",pch=20,
     ylab="Sunspots (n)")
polygon(xx,yy,col="lightgreen",border = NA)
points(sunspot.forecast$pred,col="darkgreen",lwd=1.5,pch=20)
lines(sunspot.forecast$pred,col="darkgreen",lwd=1.5)

##Now I'm going to re-do the AR model for only the training part of the data
data(sunspot.year)
sunspot.train<-window(sunspot.year,end=1950)
sunspot.train.ar <- ar(sunspot.train)
sunspot.train.ar
## 
## Call:
## ar(x = sunspot.train)
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  1.2092  -0.4558  -0.1265   0.1147  -0.1081   0.0864  -0.0936   0.0786  
##       9  
##  0.1172  
## 
## Order selected 9  sigma^2 estimated as  220
sunspot.test<-window(sunspot.year,start=1950)
##The AR model for just the training window was also AR(9), or the least bad model. I will continue with forecasting from the training data

sunspot.train.forecast <- predict(sunspot.train.ar, n.ahead = 27)
plot(sunspot.train,xlim=c(1700, 2015),col="purple",lwd=1.5, 
     ylab="Sunspots (n)")
lines(sunspot.train.forecast$pred,col="green",lwd=1.5)
lines(sunspot.test,col="red",lty="dotted",lwd=1)

##The forecasted data (green) has a smaller amplitude than the testing data (red) we set aside. This is what we expected to see with the forecasted data.









#Woodhouse data
woodhouse<-read.csv("C://Users/Nattybug/Downloads/woodhouse.csv")
woodhouse.ts <- ts(woodhouse[,-1], start=1906, frequency=1)
tsp(woodhouse.ts)
## [1] 1906 2012    1
ts.plot(woodhouse.ts)

plot(woodhouse.ts,par(c=1,1))

LeesWYflow<-ts(woodhouse[2])
year<-ts(woodhouse[1])

#Lets get some residuals
ols1<-lm(LeesWYflow~year)
summary(ols1)
## 
## Call:
## lm(formula = LeesWYflow ~ year)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9069209 -3090093  -542999  2878269 10108319 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 78446044   25757296   3.046  0.00294 **
## year          -32445      13147  -2.468  0.01520 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4200000 on 105 degrees of freedom
## Multiple R-squared:  0.05483,    Adjusted R-squared:  0.04583 
## F-statistic: 6.091 on 1 and 105 DF,  p-value: 0.0152
#look at the autocorrelation for the residuals
acf(residuals(ols1))

pacf(residuals(ols1)) 

#nothing after lag 1, looks like an AR(1) model