Homework Four

Problem 2.6

a.

#Because the mean function varies for different times the function has a non-stationary Process. 

b0 <- 1
b1<- 5 
t<- (1:200)
w<- rnorm(200)

xt<- b0 + b1*t + w
acf(xt)

b.

FirstDif<- diff(xt)
q = data.frame(cycle(FirstDif))


x= data.frame(FirstDif,q)
plot.ts(FirstDif)

acf(FirstDif)

t2 <- c(1:199)
df <- data.frame(t2, FirstDif)

#Mean is zero.  
ggplot(aes(x=t2, y=FirstDif), data= df) + geom_point() + geom_smooth(method = "lm")+ theme_economist()

a. Done by Hand

b. Done by Hand

c. Done by Hand

Problem 2.8

a.

plot(varve, main="varve", ylab="")

plot(log(varve), main="log(varve)", ylab="" )

varve1<- varve[1:317]
varve2<- varve[318:634]

varVarve1 <- var(varve1)
varVarve1
## [1] 133.4574
varVarve2<- var(varve2)
varVarve2
## [1] 594.4904
#Therefore Exhibits Heteroskedasticity because bariability of the variable is unequal across the range of values of the second variable that predicts it. 

LogVarve<- log(varve)
logvarve1<- LogVarve[1:317]
logvarve2<- LogVarve[318:634]

varlogVarve1 <- var(logvarve1)
varlogVarve1
## [1] 0.2707217
varlogVarve2<- var(logvarve2)
varlogVarve2
## [1] 0.451371
#Transformation can be seen to stabilize the variance after taking the log, the change from 133 to 594 is much less stable then the difference between .27 and .45.  
hist(varve)

hist(LogVarve)

#Normality has also improved as a result of the transformation.  

HOW TO DO PROPERLY

b.

f<- c(1:6)
x<-split(log(varve), f)
## Warning in split.default(log(varve), f): data length is not a multiple of
## split variable
ts.plot(x$`1`)

ts.plot(x$`2`)

ts.plot(x$`3`)

ts.plot(x$`4`)

ts.plot(x$`5`)

ts.plot(x$`6`)

ts.plot(gtemp)

acf(gtemp, lag.max = 100)

#Yes we  can observe behavior comparable to that observed in the global temperature records.  

c.

acf(LogVarve, lag.max = 100)

#Shows evidence of Autocovariance.  

d.

u<- diff(LogVarve)
t633<- c(1:633)
df33<- data.frame(u,t633)
ggplot() + geom_line(aes(x=t633, y=u), data= df33, color= "red")+ geom_point() + theme_economist() +  xlab("Time")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous
## Warning in max(vapply(evaled, length, integer(1))): no non-missing
## arguments to max; returning -Inf

t634<- c(1:634)
df34<- data.frame(varve,t634)
ggplot() + geom_line(aes(x=t634, y=varve), data= df34, color= "red")+ geom_point() + theme_economist() + xlab("Time")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous
## Warning in max(vapply(evaled, length, integer(1))): no non-missing
## arguments to max; returning -Inf

acf(varve)

acf(u)

#Differencing produces a reasonably stationary series as can be seen from the ACF after the log/differencing transformation.  

e. Done by Hand

f. Done by Hand

Problem 2.9

a.

fit<- lm(soi~time(soi))
summary(fit)
## 
## Call:
## lm(formula = soi ~ time(soi))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.04140 -0.24183  0.01935  0.27727  0.83866 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.70367    3.18873   4.298 2.12e-05 ***
## time(soi)   -0.00692    0.00162  -4.272 2.36e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3756 on 451 degrees of freedom
## Multiple R-squared:  0.0389, Adjusted R-squared:  0.03677 
## F-statistic: 18.25 on 1 and 451 DF,  p-value: 2.359e-05
#Yes the coefficent for time is negative and significant therefore we can see a small negative trend in sea surface temperature.  

b.

w <- periodogram(fit$residuals, plot = F)
dat <- data.frame(w$freq, w$spec)

ggplot() + geom_line(aes(x=w$freq, y=w$spec), data= dat, color= " red")+ geom_point() + theme_economist() + ylab("Spec") + xlab("Freq")
## Warning in max(vapply(evaled, length, integer(1))): no non-missing
## arguments to max; returning -Inf

Frequency1 <- 1/.083333
# 12 Months 
Frequency1
## [1] 12.00005
Frequency2<- 1/.0166667
Frequency2
## [1] 59.99988
#Therefore Minor peak would indicate cycle every 5 Years. 

Problem 2.11

a.

t = 1:545
df<- data.frame(oil,gas,t)
df2<- df
ggplot()  + geom_line(data=df, aes(x=t,y=oil, color = "Oil")) + geom_line(data=df2, aes(x=t,y=gas, color="Gas")) + ylab("Price")+theme_economist()
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous

w = rnorm(500,0,1) # 500 N(0,1) variates
v = filter(w, sides=2, filter=rep(1/3,3)) # moving average
par(mfrow=c(2,1))
plot.ts(w, main="white noise")
plot.ts(v, ylim=c(-3,3), main="moving average")

# these plots of oil and gas do not appear to be stationary nor do they seem to resemble white noise from section 1.3 they more closely resemble the moving average plot presented.  

b.

# The transformation should be applied should be transformed to the diff log because it will be easier to see magnitude of the shocks, and the white noise after transformation. 

c.

DiffLogOil <- diff(log(oil))

DiffLogGas <- diff(log(gas))
df3<- data.frame(DiffLogGas, DiffLogOil )
t <- t[-545]
df3<-cbind(df3,t)

plot1 <- ggplot() + geom_line(data=df, aes(x=t, y=oil, color = "Oil")) + geom_line(data=df2, aes(x=t,y=df2$gas, color="Gas"))  + theme_economist()

plot2<- ggplot()+ geom_line(data=df3, aes(x=t,y=DiffLogGas, color="DiffGas")) + geom_line(data=df3, aes(x=t,y=df3$DiffLogOil, color="DiffOil"))  + theme_economist()

grid.arrange(plot2, plot1, nrow=2)
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous

acf(DiffLogOil)

acf(oil)

acf(DiffLogGas)

acf(gas)

#It appears that there is a clear cyclical pattern that repeats every 24 months. 

d.

ccf(DiffLogGas, DiffLogOil)

#We can see from the CCF that Oil and Gas even when transformed are very highly correlated.  

e.

lag2.plot(DiffLogGas,DiffLogOil,3)

# There sems to be no relationships in the lagged plots. 
#outliers definetly do exist within these plots although they are very rare.  

f. (i.)

poil = diff(log(oil))
pgas = diff(log(gas))
         indi = ifelse(poil < 0, 0, 1)
         mess = ts.intersect(pgas, poil, poilL = lag(poil,-1), poilL2 = lag(poil,-2), poilL3 = lag(poil,-3), indi)
         summary(fit <- lm(pgas~ poil + poilL+ poilL2+ poilL3 + indi, data=mess))
## 
## Call:
## lm(formula = pgas ~ poil + poilL + poilL2 + poilL3 + indi, data = mess)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18517 -0.02118  0.00040  0.02210  0.34271 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.006645   0.003472  -1.914  0.05620 .  
## poil         0.682261   0.058827  11.598  < 2e-16 ***
## poilL        0.106547   0.039220   2.717  0.00681 ** 
## poilL2       0.044673   0.039201   1.140  0.25496    
## poilL3       0.012803   0.039302   0.326  0.74474    
## indi         0.012724   0.005529   2.301  0.02177 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04173 on 535 degrees of freedom
## Multiple R-squared:  0.4579, Adjusted R-squared:  0.4529 
## F-statistic:  90.4 on 5 and 535 DF,  p-value: < 2.2e-16
#It would appear the price of oil has less of a positive effect which can be seen by the lags that are placed in the regression. 

Not DONE

(ii.)

poil = diff(log(oil))
pgas = diff(log(gas))
         indi = ifelse(poil < 0, 1, 0)
         mess = ts.intersect(pgas, poil, poilL = lag(poil,-1), indi)
         summary(fit11 <- lm(pgas~ poil + poilL + indi, data=mess))
## 
## Call:
## lm(formula = pgas ~ poil + poilL + indi, data = mess)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18451 -0.02161 -0.00038  0.02176  0.34342 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.005923   0.003117   1.900  0.05794 .  
## poil         0.683127   0.058369  11.704  < 2e-16 ***
## poilL        0.111927   0.038554   2.903  0.00385 ** 
## indi        -0.012368   0.005516  -2.242  0.02534 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04169 on 539 degrees of freedom
## Multiple R-squared:  0.4563, Adjusted R-squared:  0.4532 
## F-statistic: 150.8 on 3 and 539 DF,  p-value: < 2.2e-16
poil = diff(log(oil))
pgas = diff(log(gas))
         indi = ifelse(poil < 1, 0, 0)
         mess = ts.intersect(pgas, poil, poilL = lag(poil,-1), indi)
         summary(fit22 <- lm(pgas~ poil + poilL + indi, data=mess))
## 
## Call:
## lm(formula = pgas ~ poil + poilL + indi, data = mess)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18587 -0.02038 -0.00032  0.02059  0.34594 
## 
## Coefficients: (1 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002036  0.0017984   0.113  0.90991    
## poil        0.7816923  0.0385464  20.279  < 2e-16 ***
## poilL       0.1159144  0.0386567   2.999  0.00284 ** 
## indi               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04185 on 540 degrees of freedom
## Multiple R-squared:  0.4512, Adjusted R-squared:  0.4492 
## F-statistic:   222 on 2 and 540 DF,  p-value: < 2.2e-16
plot(fit22$residuals)        

plot(fit11$residuals)