Question 1

Create a time series object with start date January 1950 and monthly frequency using this data.

NINO<- ts(NINO, start = c(1950,1), frequency = 12)
#Supply Frequency = 12 because of monthly obseravtions

Question 2

Plot the data.

NINO<- ts(NINO, start = c(1950,1), frequency = 12)
t<- as.Date.ts(NINO)
#Creating Date Column From Observations of Nino. 
NIN <- data.frame(NINO, "Date" = t)
#Creating Data Frame so that it can be utilized by GGPlot. 
ggplot(data = NIN, aes(y = NIN$X25.01, x =  NIN$Date)) + geom_line(colour = "red") + theme_economist() + xlab("Year") + ylab("Temperature")  + ggtitle("Surface Sea Temperatures Nino Region") 

Histogram of Nino

NINO<- ts(NINO, start = c(1950,1), frequency = 12)
t<- as.Date.ts(NINO)
#Creating Date Column From Observations of Nino. 
NIN <- data.frame(NINO, "Date" = t)
#Creating Data Frame so that it can be utilized by GGPlot. 
ggplot(data = NIN, aes(x = NIN$X25.01))+ theme_economist()  + ylab("Frequency")+ xlab("Temperature")  + ggtitle("Surface Sea Temperatures Nino Region") + geom_histogram(binwidth = .5, color = "red")

Question 3

BOXCOX<- (NINO^3-1)/ 3
#Executing BOX COX transformation of X^ Lambda - 1 / Lambda. 
t<- as.Date.ts(BOXCOX)
#Creating Date Column From Observations of Nino. 
NIN <- data.frame("BOXCOX"= BOXCOX, "Date" = t)
#Creating Data Frame so that it can be utilized by GGPlot. 

ggplot(data = NIN, aes(y = NIN$X25.01 , x =  NIN$Date)) + geom_line(colour = "red") + theme_economist() + xlab("Year") + ylab("Surface Sea Temperatures Nino Region")  + ggtitle("Box Cox Transformation Polt")

#Histogram of Box Cox Transformation
ggplot(data = NIN, aes(x = NIN$X25.01)) + theme_economist()  + ylab("Frequency")+ xlab("Temperature")  + ggtitle("Surface Sea Temperatures Nino Region") + geom_histogram(binwidth = 100, color = "red")

Differenced_NINO <- diff(log(NINO))
#Taking difference of the log of the Data, which tends to suppress larger fluctions that occur 
#over protions of the series where the underlying values are larger. 
t<- as.Date.ts(Differenced_NINO)
NIN <- data.frame(Differenced_NINO, t)
ggplot(data = NIN, aes(y = NIN$X25.01 , x =  NIN$t)) + geom_line(colour = "red") + theme_economist() + xlab("Year") + ylab("Differenced Surface Sea Temperatures Nino Region")  + ggtitle("Differenced Data Plot")

#Histogram of Differenced Data.  
ggplot(data = NIN, aes(x = NIN$X25.01)) + theme_economist()  + ylab("Frequency")+ xlab("Temperature")  + ggtitle("Surface Sea Temperatures Nino Region") + geom_histogram(binwidth = .01, color = "red")

I think that after looking at the histogram plots of the Box Cox transformation and the differenced data, we do not observe a noticable improvement in normality from the orginal data set, however the Box Cox transformation seemed to aproximate normality better than the differenced data which seemed to reduce the approximation to normality with outliers as well.

Question 4

NINO<- ts(NINO, start = c(1950,1), frequency = 12)
t<- as.Date.ts(NINO)
NIN <- data.frame(NINO, "Date" = t)

ggplot(data = NIN, aes(y = NIN$X25.01, x =  NIN$Date))+ ggtitle("Lowess Smoother") + geom_line(colour = "red") + geom_smooth(span= .2, size = .7) + geom_smooth(span= .15, color = "GREEN", size = .7)+theme_economist() + xlab("Year") + ylab("Surface Sea Temperatures Nino Region")  

#Used span paramter of .15 and .7 in our Geom smooth arguments to model for the proportion of nearest neibhors of X to be included in the weighting scheme of Lowess.  

I used the Loess Smoother, which is clearly showing that cyclicaly with periods longer than 2-4 years seems to exist.

Question 5

Plot a periodogram.

x<- data.frame(x = Periodogram$freq, y = Periodogram$spec)

ggplot(data = x, aes(x = x, y = y)) + geom_line(colour = "red") + theme_economist() + xlab("Frequency") + ylab("Specification") + ggtitle("Periodogram")

1/ 0.083333333
## [1] 12
# At 105.87680864 we observe a cycle of 12 years. 
1/0.016666667
## [1] 60
# At 67.08855685 we observe a cycle of 12 years. 
1/0.023333333
## [1] 42.85714
# At 64.9469622 we observe a cycle of 12 years. 

Question 6

Show a monthplot of the data.

monthplot(NINO, ylab = "Surface Sea Temperatures Nino Region")

We definetly seem to see an upward trend in Surface Sea temperatures during January through April.

Question 7

Fit a regression model with monthly dummies.

#first we create a factor on the cycles of our data set.
Monthlyfactor <- factor(cycle(NINO))
#Then we Fit the Factor to the Data to create vectors which complement each observation and whether or not tbey were of a certain month.  
Dummies <- model.matrix(NINO~Monthlyfactor)
#Remove the First column of all Ones.  
Dummies <- Dummies[,-1]
Fit<- lm(NINO~Dummies)

summary(Fit)
## 
## Call:
## lm(formula = NINO ~ Dummies)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2378 -0.5698 -0.0598  0.4832  2.7982 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             26.7230     0.1254 213.118  < 2e-16 ***
## DummiesMonthlyfactor2    0.4350     0.1773   2.453   0.0145 *  
## DummiesMonthlyfactor3    0.9532     0.1773   5.375 1.11e-07 ***
## DummiesMonthlyfactor4    0.9668     0.1773   5.452 7.35e-08 ***
## DummiesMonthlyfactor5    0.8096     0.1773   4.566 6.08e-06 ***
## DummiesMonthlyfactor6    0.3614     0.1773   2.038   0.0420 *  
## DummiesMonthlyfactor7   -0.0096     0.1773  -0.054   0.9568    
## DummiesMonthlyfactor8   -0.1862     0.1773  -1.050   0.2941    
## DummiesMonthlyfactor9   -0.1312     0.1773  -0.740   0.4597    
## DummiesMonthlyfactor10  -0.2152     0.1782  -1.208   0.2277    
## DummiesMonthlyfactor11  -0.2006     0.1782  -1.125   0.2610    
## DummiesMonthlyfactor12  -0.1712     0.1782  -0.960   0.3373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8866 on 585 degrees of freedom
## Multiple R-squared:  0.2074, Adjusted R-squared:  0.1925 
## F-statistic: 13.92 on 11 and 585 DF,  p-value: < 2.2e-16

Question 8

Fit several seasonal arima models.

#Auto.Arima returns best Arima Model according to AIC/ AICc / BIC.  
auto.arima(NINO)
## Series: NINO 
## ARIMA(4,0,2)(2,0,0)[12] with non-zero mean 
## 
## Coefficients:
##          ar1      ar2     ar3     ar4      ma1     ma2    sar1    sar2
##       2.1916  -1.3533  0.0332  0.1183  -1.1675  0.1852  0.3064  0.2886
## s.e.  0.4888   0.9913  0.5707  0.0690   0.4990  0.4807  0.0436  0.0445
##       intercept
##         26.9383
## s.e.     0.0624
## 
## sigma^2 estimated as 0.1211:  log likelihood=-220.05
## AIC=460.09   AICc=460.47   BIC=504.01
x<- arima(NINO, order = c(2,0,3))
#Monthly Seasonal Arima Model
x_MONTHLY<- arima(NINO, order = c(2,0,3), seasonal = list(order = c(2,0,0), period = 12))
#Quarterly Seasonal Arima Model
x_QUARTERLY<- arima(NINO, order = c(2,0,3), seasonal = list(order = c(2,0,0), period = 4))

tsdiag.Arima(x)

tsdiag.Arima(x_MONTHLY)

tsdiag.Arima(x_QUARTERLY)

#tsdiag.arima retuns the Standardized Residuals, the ACF of the residulas, and the p- values associated with the Q- Statistic.  

#Based on the ACF of the residuals I would prefer the Monthly dummies because without them, we notice far more departures from the model assumptions.  
x<- sarima(NINO, 2,0,3,2,0,0,12, details = FALSE)