n <- 100
epsilon <- rnorm(n = n,mean = 0,sd = 1) # white noise, setting mean and standard deviation
x1 <- numeric(length = n)#creating 4 time series
x2 <- numeric(length = n)
x3 <- numeric(length = n)
x4 <- numeric(length = n)
for(i in 2:n){
  x1[i] <- 0.95 * x1[i-1] + epsilon[i]  
  x2[i] <- 0.75 * x2[i-1] + epsilon[i]  
  x3[i] <- 0.50 * x3[i-1] + epsilon[i]  
  x4[i] <- 0.25 * x4[i-1] + epsilon[i]  
}
ts.plot(x1)  #plotting first order time series number 1

acf(x1) #correlogram for phi=0.95, which shows up at lag 1 and linearly decays at higher lags

pacf(x1)#Partial autocorrelation shows the autocorrelation left after taking the AR(1) into account, the lags fall to zero of T=1 which is consistent with an AR(1) model.

ts.plot(x2)#plotting the second AR(1) time series with phi=0.75

acf(x2) #Autocorrelation of time series 2, phi=0.75 at time step 1, decreasing to nearly zero at T=4

pacf(x2)#Partial autocorrelation after accounting for AR(1) shows a drop to 0 after the first lag, indicative of a first order autocorrelation

#I'm starting to see a pattern here, the smaller the starting value of phi gets, the few lags autocorrelation is echoed in, the echo ends sooner as later time steps are more less of a function of earlier time steps
ts.plot(x3)#plot the third simulated time series

acf(x3)#phi=0.5 autocorrelation of third time series shows a drop by lag 3 in the echo

pacf(x3) #partial autocorrelation shows a drop after lag 1 after accounting for the AR(1) model, so most of the autocorrelation can be explained by the first function

ts.plot(x4) #plot 4th simulate time series, with lowest starting value of phi

acf(x4)#autocorrelation is first order, with phi=0.25

pacf(x4)#partial correlation shows thats after accounting for AR(1), there is no autocorrelation left so the model is a good fit.

#I tried using the arima.sim function to simulate one of the time series with phi=0.75 and I think it worked.
ts.sim <- arima.sim(list(order = c(1,1,0), ar = 0.75), innov=rnorm(100,sd=0.3), n = 100)
ts.plot(ts.sim) #I found this much easier than making a loop, as long as you know what each component is doing.

#Rolling the AR(2)

#I would expect the partial correlogram to have significance past the C.I. for the first two lags, because after the AR(1) is accounted for we should still see some autocorrelation if the underlying process is AR(2)

n <- 100
epsilon <- rnorm(n = n,mean = 0,sd = 1) # white noise
x1 <- numeric(length = n)
x2 <- numeric(length = n)
x3 <- numeric(length = n)
x4 <- numeric(length = n)
for(i in 2:n){
  x1[i] <- 0.1 * x1[i-1] + epsilon[i]  
  x2[i] <- 0.2 * x2[i-1] + 0.95*x1[i-1]+epsilon[i]  
  x3[i] <- 0.50 * x3[i-1] + 0.75*x2[i-1]+epsilon[i]  
  x4[i] <- 0.25 * x4[i-1] + 0.5*x3[i-1]+epsilon[i]}
  
ts.plot(x2)#plotting the AR(2) series

acf(x2)#The ACf plot summarizes what autocorrelation we are looking at

pacf(x2) #The partial correlation shows a significant negative autocorrelation even after accounting for AR(1) at a lag of 2 years. This is what we would expect to see from an AR(2) model where there is a relationship left over even after accounting for the first phi sub 1. I specifically plotted X2 which has a higher phi2 than phi1.This is similar to our sunspot data where there is a cyclical nature and negative autocorrelation is occurring

#Increasing the sd of epsilon resulted in lag 2 and 3 of the partial correlation becoming more significant, which is odd because in other statistics normally a higher sd makes finding significance more difficult.

n <- 100
epsilon <- rnorm(n = n,mean = 0,sd = 10) # white noise
x1 <- numeric(length = n)
x2 <- numeric(length = n)
x3 <- numeric(length = n)
x4 <- numeric(length = n)
for(i in 2:n){
  x1[i] <- 0.1 * x1[i-1] + epsilon[i]  
  x2[i] <- 0.2 * x2[i-1] + 0.95*x1[i-1]+epsilon[i]  
  x3[i] <- 0.50 * x3[i-1] + 0.75*x2[i-1]+epsilon[i]  
  x4[i] <- 0.25 * x4[i-1] + 0.5*x3[i-1]+epsilon[i]}
pacf(x2)

#When phi1 is larger than phi2, the pacf looks more like an AR(1) process which makes sense because that would mean the subsequent time steps are more of a function of phi1 than phi 2.
n <- 100
epsilon <- rnorm(n = n,mean = 0,sd = 10) # white noise
x1 <- numeric(length = n)
x2 <- numeric(length = n)
x3 <- numeric(length = n)
x4 <- numeric(length = n)
for(i in 2:n){
  x1[i] <- 0.1 * x1[i-1] + epsilon[i]  
  x2[i] <- 0.2 * x2[i-1] + 0.95*x1[i-1]+epsilon[i]  
  x3[i] <- 0.5 * x3[i-1] + 0.75*x2[i-1]+epsilon[i]  
  x4[i] <- 0.4 * x4[i-1] + 0.25*x3[i-1]+epsilon[i]}
pacf(x4)

#Global temperature anomalies data set=giss.globe
#reading in global temperature data from the shiny app from 1880-2016, frequency 12

temp<-readRDS("C://Users//Nattybug//Desktop//giss.globe.rds")
tsp(temp)

## [1] 1880.000 2016.833   12.000

ts.plot(temp)#time series shows seasonality and an overall upward trend

acf(temp) #There is strong autocorrelation from month to month, where each measurement is strongly correlted with the time step before it

pacf(temp)#The partial correlation shows that the second lag does not go straight to zero so we could be looking at a second or even third or fourth AR(p)

#Detrending the data with a linear model and work with the residuals

temp.time<-time(temp)
temp.lm<-lm(temp~temp.time)
summary(temp.lm)

## 
## Call:
## lm(formula = temp ~ temp.time)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57814 -0.14040 -0.00303  0.13824  0.82365 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.389e+01  2.364e-01  -58.75   <2e-16 ***
## temp.time    7.142e-03  1.213e-04   58.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1944 on 1641 degrees of freedom
## Multiple R-squared:  0.6786, Adjusted R-squared:  0.6784 
## F-statistic:  3465 on 1 and 1641 DF,  p-value: < 2.2e-16

#The linear coefficient shows that the temperature is trending up at a rate of 0.0071 degrees per month
plot(residuals.lm(temp.lm))

#Residual plot shows a fairly normal distribution, there are no funnel shapes or obvious skewness
acf(residuals.lm(temp.lm))

pacf(residuals.lm(temp.lm))

#Replotted the data using the residuals with acf and pacf
#Autocorrelation showed a subtle difference in that the coefficient started tapering off to a lower phi than working on the raw data. The partial correlation still showed a strong second lag even after accounting for the first, suggesting a AR(2). However, I noticed that the axes for the raw data had a shorter lag window than the residuals, which I was unable to fix.Plotting the correlation on the residuals seemed to give a better look at the correlation as though some noise was removed.

####Do something new and cool###
#Working with sea ice data from the shiny app that has been taken monthly from the Northern hemisphere

seaice<-readRDS("C://Users/Nattybug/Downloads/snow.NHem.rds")
tsp(seaice)

## [1] 1966.833 2016.917   12.000

#summarize the data
summary(seaice)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.089   7.277  28.190  25.380  42.310  51.320

ts.plot(seaice)# time series plot of sea ice data for 50 years shows a seasonal pattern

acf(temp) #There is strong autocorrelation from month to month, where each measurement is strongly correlted with the time step before it

pacf(temp)#The partial correlation shows that the second lag does not go straight to zero so we could be looking at a second or even third or fourth AR(p)

#Detrending the data with a linear model and work with the residuals

temp.time<-time(temp)
temp.lm<-lm(temp~temp.time)
summary(temp.lm)

## 
## Call:
## lm(formula = temp ~ temp.time)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57814 -0.14040 -0.00303  0.13824  0.82365 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.389e+01  2.364e-01  -58.75   <2e-16 ***
## temp.time    7.142e-03  1.213e-04   58.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1944 on 1641 degrees of freedom
## Multiple R-squared:  0.6786, Adjusted R-squared:  0.6784 
## F-statistic:  3465 on 1 and 1641 DF,  p-value: < 2.2e-16

#The linear coefficient shows that the temperature is trending up at a rate of 0.0071 degrees per month
plot(residuals.lm(temp.lm))

#Residual plot shows a fairly normal distribution, there are no funnel shapes or obvious skewness
acf(residuals.lm(temp.lm))

pacf(residuals.lm(temp.lm))

#Replotted the data using the residuals with acf and pacf
#Autocorrelation showed a subtle difference in that the coefficient started tapering off to a lower phi than working on the raw data. The partial correlation still showed a strong second lag even after accounting for the first, suggesting a AR(2). However, I noticed that the axes for the raw data had a shorter lag window than the residuals, which I was unable to fix.Plotting the correlation on the residuals seemed to give a better look at the correlation as though some noise was removed.