HW 1

library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.5

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.3     v fma       2.4  
## v forecast  8.15      v expsmooth 2.3

## Warning: package 'ggplot2' was built under R version 4.0.5

## Warning: package 'forecast' was built under R version 4.0.5

## Warning: package 'fma' was built under R version 4.0.5

## Warning: package 'expsmooth' was built under R version 4.0.5

##

summary(elecdaily)

##      Demand         WorkDay        Temperature   
##  Min.   :166.7   Min.   :0.0000   Min.   : 9.80  
##  1st Qu.:205.4   1st Qu.:0.0000   1st Qu.:16.60  
##  Median :221.0   Median :1.0000   Median :20.30  
##  Mean   :221.3   Mean   :0.6877   Mean   :21.26  
##  3rd Qu.:236.7   3rd Qu.:1.0000   3rd Qu.:24.30  
##  Max.   :347.6   Max.   :1.0000   Max.   :43.20

daily20 <- head(elecdaily,20)

dailyts<-ts(daily20)

autoplot(dailyts)

tslmdaily<- tslm(Demand~Temperature, data=dailyts)
plot(Demand~Temperature, data=dailyts, main= "Demand vs. Temperature")

#The relation ship between demand and temperature is positive meanign that as temperature goes up this causes demand to go up.

1.b

checkresiduals(tslmdaily$residuals)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

#There is no correlations for the residuals and they sem to be normally distributed so I would say the relationship

1.c

forecastdemand15<-forecast(tslmdaily,newdata=data.frame(Temperature=15))
forecastdemand15

##    Point Forecast   Lo 80    Hi 80    Lo 95    Hi 95
## 21       140.5701 108.681 172.4591 90.21166 190.9285

forecastdemand35<-forecast(tslmdaily,newdata=data.frame(Temperature=35))
forecastdemand35

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 21       275.7146 245.2278 306.2014 227.5706 323.8586

# I think for the most part these are reasonable but I do quest the 15C point prediction and lower bounds because I do not think the relation is entirely linear. I think its more of an exponenial relation or even more parabolic because if you get too cold you might end ups using more electricty than when hot because you have heaters plugged in.

1.d

autoplot(daily20, facets=TRUE)

daily20 %>%
  as.data.frame() %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

fit <- tslm(Demand ~ Temperature, data=daily20)
checkresiduals(fit)

## 
##  Breusch-Godfrey test for serial correlation of order up to 5
## 
## data:  Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774

forecast(fit, newdata=data.frame(Temperature=c(15,35)))

##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 4.285714       140.5701 108.6810 172.4591  90.21166 190.9285
## 4.428571       275.7146 245.2278 306.2014 227.57056 323.8586

autoplot(elecdaily, facets=TRUE)

elecdaily %>%
  as.data.frame() %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

fit <- tslm(Demand ~ Temperature, data=elecdaily)
checkresiduals(fit)

## 
##  Breusch-Godfrey test for serial correlation of order up to 14
## 
## data:  Residuals from Linear regression model
## LM test = 271.66, df = 14, p-value < 2.2e-16

forecast(fit, newdata=data.frame(Temperature=c(15,35)))

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 53.57143       218.6592 184.4779 252.8406 166.3039 271.0146
## 53.71429       227.0242 192.6585 261.3898 174.3866 279.6617

#This confirmed my origin supspicion that the relation was most likely not linear and was in fact parabolic. THis suggests my model is incorrect and not sufficient.

autoplot(mens400, xlab="Year", ylab="Winning Time", main="Winning Time for 400 mater but Year")

#There's an overall downtrend as time passes, meaning as time goes on winning time gets faster

menstime<-time(mens400)
tslmmens<-(tslm(mens400~menstime, data=mens400))
tslmmens

## 
## Call:
## tslm(formula = mens400 ~ menstime, data = mens400)
## 
## Coefficients:
## (Intercept)     menstime  
##   172.48148     -0.06457

autoplot(mens400, ylab="Time", xlab="Year",main= "AB Line Fit to Time by Year")+
  geom_abline(slope = tslmmens$coefficients[2],
              intercept = tslmmens$coefficients[1])

#Winning times ares decreasing at .06457 seconds every year.

2.c

checkresiduals(tslmmens$residuals)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

# there are a few outliers and a heavy concentration in the middle but I would say the

2.d

mens2020<- forecast(tslmmens,newdata=data.frame(menstime=2020))
mens2020

##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2020       42.04231 40.44975 43.63487 39.55286 44.53176

mens2020$upper

## Time Series:
## Start = 2020 
## End = 2020 
## Frequency = 0.25 
##      Series 1 Series 2
## 2020 43.63487 44.53176

mens2020$lower

## Time Series:
## Start = 2020 
## End = 2020 
## Frequency = 0.25 
##      Series 1 Series 2
## 2020 40.44975 39.55286

#Projected winning is 42.04 with the 80% interval being 40.45 to 43.63 seconds and the 95% interval being 39.55 to 44.53 seconds.

easter(ausbeer)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00

#Easter, "Returns a vector of 0's and 1's or fractional results if Easter spans March and April in the observed time period. Easter is defined as the days from Good Friday to Easter Sunday inclusively". ausbeer is the, "Total quarterly beer production in Australia". So this returns wether of not its was easter or good firday in first or second quarter during the years we have data for australian beer production.

5.a

autoplot(fancy, xlab= "Monthly Sales", ylab="Year")

head(fancy,30)

##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1987  1664.81  2397.53  2840.71  3547.29  3752.96  3714.74  4349.61  3566.34
## 1988  2499.81  5198.24  7225.14  4806.03  5900.88  4951.34  6179.12  4752.15
## 1989  4717.02  5702.63  9957.58  5304.78  6492.43  6630.80                  
##           Sep      Oct      Nov      Dec
## 1987  5021.82  6423.48  7600.60 19756.21
## 1988  5496.43  5835.10 12600.08 28541.72
## 1989

#This showss a standdard season fluctuation (right around Dec or Christmas is my guess) with huge jumps in 1993 and 1994.

5.b

#seasonal variations require logs since the exponetnial increase with be mitigated.

5.c

tslmfancy<-tslm(BoxCox(fancy,0)~season+trend)
tslmfancy

## 
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ season + trend)
## 
## Coefficients:
## (Intercept)      season2      season3      season4      season5      season6  
##     7.60586      0.25104      0.69521      0.38293      0.40799      0.44696  
##     season7      season8      season9     season10     season11     season12  
##     0.60822      0.58535      0.66634      0.74403      1.20302      1.95814  
##       trend  
##     0.02239

5.d

autoplot(tslmfancy$residuals)

checkresiduals((tslmfancy$residuals))

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

#There doesn't look to be any pattern with the residuals so this suggests a good correlation

Time<-time(fancy)
cbind.data.frame(
    Month = factor(
      month.abb[round(12*(Time - floor(Time)) + 1)],
      labels = month.abb,
      ordered = TRUE
    ),
    Residuals = tslmfancy$residuals
    ) %>%
  ggplot(aes(x = Month,
             y = Residuals)) +
    geom_boxplot()

## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.

#The box plots just reveal that in the month of August the model was not as accurate but was fairly consistent otherwise

5.f

tslmfancy$coefficients

## (Intercept)     season2     season3     season4     season5     season6 
##  7.60586042  0.25104367  0.69520658  0.38293405  0.40799437  0.44696253 
##     season7     season8     season9    season10    season11    season12 
##  0.60821562  0.58535238  0.66634464  0.74403359  1.20301639  1.95813657 
##       trend 
##  0.02239298

#The coefficients show a positive trend of increasing sales oover time with every season having positive coefficients. Additionally, this data indicates sales are lower earler in the year then increase

5.g

checkresiduals(tslmfancy)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals from Linear regression model
## LM test = 37.152, df = 17, p-value = 0.003209

# The residuals are correlated since the pvalue is under 0.05.

5.h & i

future_fancy <- rep(0, 36)
for(i in 1:36){
  if(i %% 12 == 3){
    future_fancy[i] <- 1
  }
}
future_fancy <- ts(data = future_fancy,
                   start = 1994,
                   end = c(1996, 12),
                   frequency = 12)
fctslmfancy <- forecast(tslmfancy,newdata = data.frame(Time = time(future_fancy)))
autoplot(fctslmfancy)

fctslmfancy$upper

##               [,1]      [,2]
## Jan 1994  9.771532  9.913525
## Feb 1994 10.044969 10.186962
## Mar 1994 10.511525 10.653518
## Apr 1994 10.221645 10.363638
## May 1994 10.269098 10.411091
## Jun 1994 10.330459 10.472452
## Jul 1994 10.514105 10.656099
## Aug 1994 10.513635 10.655628
## Sep 1994 10.617020 10.759014
## Oct 1994 10.717102 10.859095
## Nov 1994 11.198478 11.340471
## Dec 1994 11.975991 12.117984
## Jan 1995 10.043182 10.186763
## Feb 1995 10.316618 10.460200
## Mar 1995 10.783174 10.926756
## Apr 1995 10.493295 10.636876
## May 1995 10.540748 10.684330
## Jun 1995 10.602109 10.745691
## Jul 1995 10.785755 10.929337
## Aug 1995 10.785285 10.928867
## Sep 1995 10.888670 11.032252
## Oct 1995 10.988752 11.132334
## Nov 1995 11.470128 11.613710
## Dec 1995 12.247641 12.391223
## Jan 1996 10.315440 10.460940
## Feb 1996 10.588877 10.734376
## Mar 1996 11.055433 11.200932
## Apr 1996 10.765553 10.911053
## May 1996 10.813007 10.958506
## Jun 1996 10.874368 11.019867
## Jul 1996 11.058014 11.203513
## Aug 1996 11.057543 11.203043
## Sep 1996 11.160929 11.306428
## Oct 1996 11.261011 11.406510
## Nov 1996 11.742386 11.887886
## Dec 1996 12.519900 12.665399

fctslmfancy$lower

##               [,1]      [,2]
## Jan 1994  9.246995  9.105002
## Feb 1994  9.520432  9.378439
## Mar 1994  9.986988  9.844995
## Apr 1994  9.697108  9.555115
## May 1994  9.744562  9.602569
## Jun 1994  9.805923  9.663930
## Jul 1994  9.989569  9.847576
## Aug 1994  9.989099  9.847106
## Sep 1994 10.092484  9.950491
## Oct 1994 10.192566 10.050573
## Nov 1994 10.673942 10.531949
## Dec 1994 11.451455 11.309462
## Jan 1995  9.512777  9.369195
## Feb 1995  9.786214  9.642632
## Mar 1995 10.252770 10.109188
## Apr 1995  9.962890  9.819308
## May 1995 10.010343  9.866762
## Jun 1995 10.071704  9.928123
## Jul 1995 10.255351 10.111769
## Aug 1995 10.254880 10.111299
## Sep 1995 10.358266 10.214684
## Oct 1995 10.458347 10.314766
## Nov 1995 10.939723 10.796142
## Dec 1995 11.717236 11.573655
## Jan 1996  9.777950  9.632451
## Feb 1996 10.051387  9.905887
## Mar 1996 10.517943 10.372443
## Apr 1996 10.228063 10.082564
## May 1996 10.275516 10.130017
## Jun 1996 10.336878 10.191378
## Jul 1996 10.520524 10.375024
## Aug 1996 10.520053 10.374554
## Sep 1996 10.623439 10.477939
## Oct 1996 10.723521 10.578021
## Nov 1996 11.204896 11.059397
## Dec 1996 11.982409 11.836910

#Intervals can be seen above

5.j

#Find the best way to transform the data since it may not be exponential

6.a

gas2004<-window(gasoline,end=2005)
autoplot(gas2004, ylab = "Gas Supply (Weekly)")

fourier.gas1 <- tslm(gas2004 ~ trend + fourier(gas2004, K=7))
fourier.gas2 <- tslm(gas2004 ~ trend + fourier(gas2004, K=12))
fourier.gas3 <- tslm(gas2004 ~ trend + fourier(gas2004, K=20))

autoplot(gas2004, ylab = "Gas Supply (Weekly)",main= "Fourier Transformation") +  autolayer(fitted(fourier.gas1))+
autolayer(fitted(fourier.gas2))+autolayer(fitted(fourier.gas3))

6.b

CV(fourier.gas1)

##            CV           AIC          AICc           BIC         AdjR2 
##     0.0714734 -1913.5362459 -1912.6718391 -1835.5478956     0.8501073

CV(fourier.gas2)

##            CV           AIC          AICc           BIC         AdjR2 
##  7.096945e-02 -1.919276e+03 -1.917110e+03 -1.795412e+03  8.532618e-01

CV(fourier.gas3)

##            CV           AIC          AICc           BIC         AdjR2 
##  7.223834e-02 -1.908017e+03 -1.902469e+03 -1.710753e+03  8.540588e-01

#With the adjR2 between 3 models close enough I turn to AIC and CV (Akaike's Information Criteria and Coeffeicent Variation). I choose fg1 because the AIC and CV are both in between fg2 and fg3 where they switch highest and lowest for AIC and CV.

6.c

checkresiduals(fourier.gas1$residuals)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

# There is indeed correlation of the residuals.

6.d

fcgas <- forecast(fourier.gas1, newdata=data.frame(fourier(gas2004,12,52)))
fcgas

##          Point Forecast    Lo 80    Hi 80    Lo 95     Hi 95
## 2005.014       8.812756 8.469266 9.156246 8.287026  9.338486
## 2005.033       8.666945 8.323371 9.010519 8.141087  9.192803
## 2005.052       8.599822 8.256209 8.943435 8.073903  9.125740
## 2005.071       8.620059 8.276458 8.963660 8.094159  9.145959
## 2005.090       8.701192 8.357636 9.044748 8.175361  9.227023
## 2005.110       8.801076 8.457564 9.144588 8.275312  9.326840
## 2005.129       8.885137 8.541645 9.228630 8.359403  9.410871
## 2005.148       8.940023 8.596528 9.283517 8.414286  9.465760
## 2005.167       8.972165 8.628667 9.315664 8.446422  9.497908
## 2005.186       8.995783 8.652288 9.339277 8.470046  9.521519
## 2005.205       9.020393 8.676906 9.363880 8.494667  9.546119
## 2005.225       9.046109 8.702625 9.389593 8.520389  9.571829
## 2005.244       9.067947 8.724463 9.411430 8.542227  9.593666
## 2005.263       9.083648 8.740164 9.427132 8.557927  9.609369
## 2005.282       9.097734 8.754247 9.441220 8.572009  9.623458
## 2005.301       9.118445 8.774957 9.461934 8.592718  9.644173
## 2005.320       9.150463 8.806975 9.493951 8.624736  9.676190
## 2005.340       9.189916 8.846430 9.533401 8.664192  9.715639
## 2005.359       9.226458 8.882974 9.569941 8.700737  9.752178
## 2005.378       9.251462 8.907979 9.594945 8.725742  9.777181
## 2005.397       9.266112 8.922628 9.609595 8.740391  9.791832
## 2005.416       9.282567 8.939081 9.626052 8.756844  9.808290
## 2005.435       9.316150 8.972663 9.659637 8.790424  9.841875
## 2005.455       9.373402 9.029915 9.716889 8.847676  9.899127
## 2005.474       9.444701 9.101215 9.788186 8.918978  9.970424
## 2005.493       9.507761 9.164277 9.851245 8.982040 10.033481
## 2005.512       9.540941 9.197458 9.884424 9.015222 10.066661
## 2005.531       9.537887 9.194403 9.881370 9.012166 10.063607
## 2005.550       9.513043 9.169558 9.856528 8.987321 10.038766
## 2005.570       9.492990 9.149503 9.836476 8.967265 10.018715
## 2005.589       9.498122 9.154635 9.841608 8.972396 10.023847
## 2005.608       9.526608 9.183122 9.870093 9.000885 10.052331
## 2005.627       9.552267 9.208783 9.895751 9.026546 10.077987
## 2005.646       9.539617 9.196134 9.883100 9.013898 10.065336
## 2005.665       9.467863 9.124380 9.811347 8.942143  9.993583
## 2005.685       9.348603 9.005118 9.692088 8.822881  9.874326
## 2005.704       9.224634 8.881147 9.568121 8.698908  9.750359
## 2005.723       9.148483 8.804995 9.491971 8.622756  9.674209
## 2005.742       9.152341 8.808855 9.495827 8.626617  9.678065
## 2005.761       9.227562 8.884078 9.571046 8.701841  9.753283
## 2005.780       9.327163 8.983680 9.670646 8.801444  9.852883
## 2005.800       9.391261 9.047777 9.734744 8.865541  9.916980
## 2005.819       9.381140 9.037654 9.724625 8.855416  9.906863
## 2005.838       9.301798 8.958309 9.645287 8.776069  9.827526
## 2005.857       9.199081 8.855590 9.542572 8.673349  9.724812
## 2005.876       9.132561 8.789072 9.476049 8.606832  9.658289
## 2005.895       9.139896 8.796410 9.483381 8.614173  9.665619
## 2005.915       9.213689 8.870205 9.557174 8.687968  9.739411
## 2005.934       9.304509 8.961025 9.647993 8.778788  9.830230
## 2005.953       9.348279 9.004790 9.691768 8.822550  9.874008
## 2005.972       9.302031 8.958513 9.645549 8.776259  9.827804
## 2005.991       9.167552 8.823967 9.511136 8.641677  9.693427

gas2005<-window(gasoline,start=2005, end=2006)

autoplot(gas2005, series = "Actual")+ autolayer(fcgas$mean,series = "Forecast", ylab="Gas Supply (Weekly)", main="Actual vs. Forecast of Weekly US Gas Supply")

## Warning: Ignoring unknown parameters: ylab, main

#With the exception of the large dip aprox. 2/3 of the way through 2005 the forecast follows the actual fairly well

7.a

plot(huron)

#Looks like a lot of seasonal changes in the lake level and the magnitude of the changes is increasing suggesting that a multiplicative decomposition may be in order. Also there does look like an overal negative trend of water level to time

7.b

lmhuron<-tslm(huron~trend)

summary(lmhuron)

## 
## Call:
## tslm(formula = huron ~ trend)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.50997 -0.72726  0.00083  0.74402  2.53565 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.202037   0.230111  44.335  < 2e-16 ***
## trend       -0.024201   0.004036  -5.996 3.55e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.13 on 96 degrees of freedom
## Multiple R-squared:  0.2725, Adjusted R-squared:  0.2649 
## F-statistic: 35.95 on 1 and 96 DF,  p-value: 3.545e-08

Time<-time(huron)
t2<-ts(pmax(0,Time-1915),start = 1875)
pwhuron<-tslm(huron~Time+t2)
summary(pwhuron)

## 
## Call:
## tslm(formula = huron ~ Time + t2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49626 -0.66240 -0.07139  0.85163  2.39222 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 132.90870   19.97687   6.653 1.82e-09 ***
## Time         -0.06498    0.01051  -6.181 1.58e-08 ***
## t2            0.06486    0.01563   4.150 7.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared:  0.3841, Adjusted R-squared:  0.3711 
## F-statistic: 29.62 on 2 and 95 DF,  p-value: 1.004e-10

#The piecewise has a higher adjr2 value and lower p value and lower residual standard error suggesting the piecewise function is a more accurate model

7.c

tslm_huron <- tslm(huron ~ trend)

t <- time(huron)
t.break <- 1915
t_piece <- ts(pmax(0,t-t.break), start=1875)
pw_huron <- tslm(huron ~ t + t_piece)
summary(pw_huron)

## 
## Call:
## tslm(formula = huron ~ t + t_piece)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49626 -0.66240 -0.07139  0.85163  2.39222 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 132.90870   19.97687   6.653 1.82e-09 ***
## t            -0.06498    0.01051  -6.181 1.58e-08 ***
## t_piece       0.06486    0.01563   4.150 7.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared:  0.3841, Adjusted R-squared:  0.3711 
## F-statistic: 29.62 on 2 and 95 DF,  p-value: 1.004e-10

h=8
fc_tslm_huron <- forecast(tslm_huron, h=h)

t.new <- t[length(t)] + seq(h)
t_piece_new <- t_piece[length(t_piece)]+seq(h)
newdata <- cbind(t=t.new,
                 t_piece=t_piece_new) %>%
  as.data.frame()
fc_pw_huron <- forecast(pw_huron,newdata = newdata)

autoplot(huron) +
  autolayer(fitted(tslm_huron), series = "Linear") +
  autolayer(fc_tslm_huron, series = "Linear")

autoplot(huron) +
  autolayer(fitted(pw_huron), series = "Piecewise") +
  autolayer(fc_pw_huron, series="Piecewise")

#Chapter 6

#3x5 MA is taking the average for the first five terms and then getting the moving average of each three observation set. Since a 3X5 MA will lead to a 1/15(y + 2y + 3y + 3y + 3y + 2y + y). 7th term would therefore be 1/15 or 0.067.

2.a

plot(plastics)

#Plot shows seasonal trend in plastics sales with a positive upward trend

2.b

decompose_plastics<-decompose(plastics, type= "multiplicative")
plot(decompose_plastics)

2.c

# Looking at the seasonal and trend decomposition our answer does support our interpretation from part a

2.d

plasticseasonal<-decompose_plastics$x/decompose_plastics$seasonal
plot(plasticseasonal)

2.e

plasticoutlier <- plastics
plasticoutlier[42.0] <- plasticoutlier[42.0] + 690
decomposeoutlier <- decompose(plasticoutlier, "multiplicative")
plasticoutlieradj <- decomposeoutlier$x / decomposeoutlier$seasonal
plot(plasticoutlieradj)

#Besides for the point becoming more lit, a spike can now be seen in the data set.

2.f

decompose_plasticsadj<-decompose((plasticoutlieradj), type= "multiplicative")
autoplot(decompose_plasticsadj)

#At the the beginning

plasticoutlier2 <- plastics
plasticoutlier2[1] <- plasticoutlier2[1] + 690
decomposeoutlier2 <- decompose(plasticoutlier2, "multiplicative")
plasticoutlieradj2 <- decomposeoutlier2$x / decomposeoutlier2$seasonal

decompose_plasticsadj2<-decompose((plasticoutlieradj2), type= "multiplicative")
autoplot(decompose_plasticsadj2)

#When the placement of the outlier is altered the pattern in the seasonality of the data is altered and the pattern changed.

library(seasonal)

## Warning: package 'seasonal' was built under R version 4.0.5

retaildata<-readxl::read_excel("~/College Documents/Boston College/retail.xlsx", skip =1)
head(retaildata)

## # A tibble: 6 x 190
##   `Series ID`         A3349335T A3349627V A3349338X A3349398A A3349468W
##   <dttm>                  <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1 1982-04-01 00:00:00      303.      41.7      63.9      409.      65.8
## 2 1982-05-01 00:00:00      298.      43.1      64        405.      65.8
## 3 1982-06-01 00:00:00      298       40.3      62.7      401       62.3
## 4 1982-07-01 00:00:00      308.      40.9      65.6      414.      68.2
## 5 1982-08-01 00:00:00      299.      42.1      62.6      404.      66  
## 6 1982-09-01 00:00:00      305.      42        64.4      412.      62.3
## # ... with 184 more variables: A3349336V <dbl>, A3349337W <dbl>,
## #   A3349397X <dbl>, A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>,
## #   A3349790V <dbl>, A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>,
## #   A3349873A <dbl>, A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>,
## #   A3349789K <dbl>, A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>,
## #   A3349799R <dbl>, A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>,
## #   A3349416V <dbl>, A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>,
## #   A3349727C <dbl>, A3349641R <dbl>, A3349639C <dbl>, A3349415T <dbl>,
## #   A3349349F <dbl>, A3349563V <dbl>, A3349350R <dbl>, A3349640L <dbl>,
## #   A3349566A <dbl>, A3349417W <dbl>, A3349352V <dbl>, A3349882C <dbl>,
## #   A3349561R <dbl>, A3349883F <dbl>, A3349721R <dbl>, A3349478A <dbl>,
## #   A3349637X <dbl>, A3349479C <dbl>, A3349797K <dbl>, A3349477X <dbl>,
## #   A3349719C <dbl>, A3349884J <dbl>, A3349562T <dbl>, A3349348C <dbl>,
## #   A3349480L <dbl>, A3349476W <dbl>, A3349881A <dbl>, A3349410F <dbl>,
## #   A3349481R <dbl>, A3349718A <dbl>, A3349411J <dbl>, A3349638A <dbl>,
## #   A3349654A <dbl>, A3349499L <dbl>, A3349902A <dbl>, A3349432V <dbl>,
## #   A3349656F <dbl>, A3349361W <dbl>, A3349501L <dbl>, A3349503T <dbl>,
## #   A3349360V <dbl>, A3349903C <dbl>, A3349905J <dbl>, A3349658K <dbl>,
## #   A3349575C <dbl>, A3349428C <dbl>, A3349500K <dbl>, A3349577J <dbl>,
## #   A3349433W <dbl>, A3349576F <dbl>, A3349574A <dbl>, A3349816F <dbl>,
## #   A3349815C <dbl>, A3349744F <dbl>, A3349823C <dbl>, A3349508C <dbl>,
## #   A3349742A <dbl>, A3349661X <dbl>, A3349660W <dbl>, A3349909T <dbl>,
## #   A3349824F <dbl>, A3349507A <dbl>, A3349580W <dbl>, A3349825J <dbl>,
## #   A3349434X <dbl>, A3349822A <dbl>, A3349821X <dbl>, A3349581X <dbl>,
## #   A3349908R <dbl>, A3349743C <dbl>, A3349910A <dbl>, A3349435A <dbl>,
## #   A3349365F <dbl>, A3349746K <dbl>, ...

retailts <- ts(retaildata[,"A3349627V"],
  frequency=12, start=c(1982,4))

X11decomp<-seas(retailts, x11 = "")

autoplot(X11decomp)

# There is outliers in the data specifically in 1988 and 1989

4.a

#Firtly there is an overal upward trend in the data. Next from the residual graph we can we there in an outlying dip in the data in the early 90s follwed by a flattening of the overall trend.

4.b

#You can see it in the dip of the remainders and the seasonality change in the 90s as well

5.a

plot(cangas)

ggsubseriesplot(cangas)

ggseasonplot(cangas)

#Production is lower in the summer months and in the month of February. This is most likely because everyone has no need for as much gas then

cangasstl<-stl(cangas, s.window = 12)

autoplot(cangasstl)

5.c

cangasts <- ts(cangas,frequency=12, start=c(1960,45))

X11cangas<-seas(cangasts, x11 = "")

autoplot(X11cangas)

#There is more variation with X11 when it comes to seasonality

6.a

brickstl1<-stl(bricksq, s.window = "periodic", t.window = 12)
autoplot(brickstl1)

brickstl2<-stl(bricksq,s.window=40, t.window= 12)
autoplot(brickstl2)

brickstl3<-stl(bricksq,s.window=12)
autoplot(brickstl3)

6.b

bricksea<-seasadj(brickstl1)
autoplot(bricksea, ylab = "Brick Production", xlab = "Time",main = "Seasonal Adjusted Brick Production")

6.c

naivebrick<-naive(bricksea)
autoplot(naivebrick)

brickstlf<-stlf(bricksq,method="naive")
brickstlf_fc<-forecast(brickstlf)
brickstlf_fc

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1994 Q4       467.0649 441.7635 492.3662 428.3698 505.7599
## 1995 Q1       426.0748 390.2933 461.8563 371.3517 480.7979
## 1995 Q2       484.0357 440.2125 527.8590 417.0139 551.0576
## 1995 Q3       494.0000 443.3973 544.6027 416.6098 571.3902
## 1995 Q4       467.0649 410.4893 523.6404 380.5400 553.5897
## 1996 Q1       426.0748 364.0994 488.0502 331.2916 520.8580
## 1996 Q2       484.0357 417.0946 550.9768 381.6582 586.4133
## 1996 Q3       494.0000 422.4370 565.5630 384.5538 603.4462

checkresiduals(brickstlf_fc$residuals)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

#Although there is slight correlation in residuals on ACF at 4 and 8, it still apears that there isn't enough for there to be an issue with the model

6.f

brickstl<-stl(bricksq,t.window=12, s.window="periodic", robust=TRUE)
brickstl1<-seasadj(brickstl)
brickstl2<-naive(brickstl1)
brickstl_fc<-forecast(brickstl2)
brickstl_fc

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1994 Q4       471.7973 442.1625 501.4322 426.4748 517.1199
## 1995 Q1       471.7973 429.8874 513.7073 407.7015 535.8931
## 1995 Q2       471.7973 420.4683 523.1264 393.2963 550.2983
## 1995 Q3       471.7973 412.5277 531.0670 381.1522 562.4425
## 1995 Q4       471.7973 405.5318 538.0628 370.4530 573.1417
## 1996 Q1       471.7973 399.2071 544.3876 360.7802 582.8145
## 1996 Q2       471.7973 393.3909 550.2037 351.8851 591.7096
## 1996 Q3       471.7973 387.9774 555.6173 343.6058 599.9889
## 1996 Q4       471.7973 382.8928 560.7018 335.8296 607.7650
## 1997 Q1       471.7973 378.0838 565.5109 328.4748 615.1199

checkresiduals(brickstl_fc$residuals)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

#Results look fairly similar but there appears to be mosr correlation with residuals using robust

autoplot(writing)

hist(writing)

stlfwriting <- stlf(writing, s.window = "periodic", robust = TRUE,method = "naive")

autoplot(stlfwriting)

autoplot(fancy)

hist(fancy)

stlffancy <- stlf(fancy, s.window = "period",lambda = BoxCox.lambda(fancy),method= "rwdrift", robust = TRUE)
autoplot(stlffancy)

HW 1

James Lunga

11/14/2021