Questions
Exercise 1
vic2014 <- vic_elec %>%
slice(35089:36576) %>%
index_by(Date = as_date(Time)) %>%
summarise(
Demand = sum(Demand),
Max_temperature = max(Temperature))
autoplot(vic2014)+
labs(title="Demand for electricity in January 2014")
## Plot variable not specified, automatically selected `.vars = Demand`

vic2014 %>%
ggplot(aes(x = Max_temperature, y = Demand)) +
labs(y = "Demand",
x = "Maximum Temp") +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

#there is a positive relationship because as temp rises, more people want to air condition their homes.
#b
fit <- vic2014 %>%
model(TSLM(Demand ~ Max_temperature)) %>%
report()
## Series: Demand
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -49978.2 -10218.9 -121.3 18533.2 35440.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59083.9 17424.8 3.391 0.00203 **
## Max_temperature 6154.3 601.3 10.235 3.89e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24540 on 29 degrees of freedom
## Multiple R-squared: 0.7832, Adjusted R-squared: 0.7757
## F-statistic: 104.7 on 1 and 29 DF, p-value: 3.8897e-11
fit %>% gg_tsresiduals()

#this is clearly not a normal distribution. I think the problem with this may be our small sample size. There may be a very small trend in our innovation residuals, but I could understand someone disagreeing with me. I dont see any outliers, but the reason why i'm doubtful to say the data has a trend is the jan 26-27 huge decrease, so that may be considered influential. Autocorrelation is really good, within the blue lines.
#c
hitemp <- vic2014 %>%
model(TSLM(Demand~Max_temperature)) %>%
forecast(new_data(vic2014, 1) %>%
mutate(Max_temperature=35))
autoplot(hitemp)+
autolayer(vic2014)
## Plot variable not specified, automatically selected `.vars = Demand`

lowtemp <- vic2014 %>%
model(TSLM(Demand~Max_temperature)) %>%
forecast(new_data(vic2014, 1) %>%
mutate(Max_temperature=15))
autoplot(lowtemp)+
autolayer(vic2014)
## Plot variable not specified, automatically selected `.vars = Demand`

#i think the hitemp is a reasonable forecast because it fits with the trend, however I do not think the low temp is good because it is much lower than all the other observations.
lowtemp %>%
hilo() %>%
select("80%")
## # A tsibble: 1 x 2 [1D]
## `80%` Date
## <hilo> <date>
## 1 [117908.1, 184888.6]80 2014-02-01
hitemp %>%
hilo() %>%
select("80%")
## # A tsibble: 1 x 2 [1D]
## `80%` Date
## <hilo> <date>
## 1 [242088.4, 306880.1]80 2014-02-01
vicag <- vic_elec %>%
index_by(Date = as_date(Time)) %>%
summarise(
Demand = sum(Demand),
Max_temperature = max(Temperature))
ggplot(data=vicag)+
geom_point(mapping = aes(x=Max_temperature, y= Demand))

#it looks like as temperature increases to about 25 C, demand decreases, then rises. I think this shows that my model was pretty accurate.
Exercise 2
#a
autoplot(olympic_running) +
facet_wrap(~Length + Sex, scales = "free")
## Plot variable not specified, automatically selected `.vars = Time`

autoplot(olympic_running) +
facet_wrap(~Length + Sex, scales = "free") +
labs(title = "Running", x = "Year", y = "Time") +
theme(legend.position = "none") +
geom_smooth(method="lm", se=FALSE)
## Plot variable not specified, automatically selected `.vars = Time`
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (stat_smooth).

#b
olympic_lm <- lm(Year ~ ., olympic_running)
olympic_lm
##
## Call:
## lm(formula = Year ~ ., data = olympic_running)
##
## Coefficients:
## (Intercept) Length Sexwomen Time
## 1947.93654 0.05872 34.13151 -0.32515
#c
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
##
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
checkresiduals(olympic_lm)

##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals
## LM test = 179.57, df = 10, p-value < 2.2e-16
detach(package:forecast, unload = TRUE)
#d
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "100") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "100") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "200") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "200") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "400") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "400") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "800") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "800") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "1500") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "1500") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "5000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "5000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "10000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "10000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running)

Exercise 3
autoplot(souvenirs)
## Plot variable not specified, automatically selected `.vars = Sales`

#it spikes a lot during Christmas and has increased throughout the years. I don't see anything really unusual.
#b. We take logs to make an exponential model into a linear model.
#c
dumfest <- souvenirs %>% mutate(ifelse(month(Month)==3,1,0))
fitsou2 <- dumfest %>% model(TSLM(log(Sales) ~ trend() + season()))
report(fitsou2)
## Series: Sales
## Model: TSLM
## Transformation: log(Sales)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.416437 -0.126185 0.006075 0.113889 0.385670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6058604 0.0768740 98.939 < 2e-16 ***
## trend() 0.0223930 0.0008448 26.508 < 2e-16 ***
## season()year2 0.2510437 0.0993278 2.527 0.013718 *
## season()year3 0.6952066 0.0993386 6.998 1.18e-09 ***
## season()year4 0.3829341 0.0993565 3.854 0.000252 ***
## season()year5 0.4079944 0.0993817 4.105 0.000106 ***
## season()year6 0.4469625 0.0994140 4.496 2.63e-05 ***
## season()year7 0.6082156 0.0994534 6.116 4.69e-08 ***
## season()year8 0.5853524 0.0995001 5.883 1.21e-07 ***
## season()year9 0.6663446 0.0995538 6.693 4.27e-09 ***
## season()year10 0.7440336 0.0996148 7.469 1.61e-10 ***
## season()year11 1.2030164 0.0996828 12.068 < 2e-16 ***
## season()year12 1.9581366 0.0997579 19.629 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1858 on 71 degrees of freedom
## Multiple R-squared: 0.9527, Adjusted R-squared: 0.9447
## F-statistic: 119.1 on 12 and 71 DF, p-value: < 2.22e-16
augment(fitsou2) %>%
ggplot(aes(x = Month)) +
geom_line(aes(y = Sales, colour = "Data")) +
geom_line(aes(y = .fitted, colour = "Fitted")) +
labs(y="Megalitres",title ="Australian quarterly beer production") +
scale_colour_manual(values = c(Data = "black", Fitted = "#D55E00"))

#e
mdl <- augment(fitsou2, souvenirs$Month)
## Warning: The `type` argument of `augment()` is deprecated as of fabletools 0.2.1.
## The type argument is now deprecated for changes to broom v0.7.0.
## Response residuals are now always found in `.resid` and innovation residuals are now found in `.innov`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
mdl$Month <- month(mdl$Month, label=FALSE)
mdlres <- data.frame(inno=mdl$.innov, month=mdl$Month)
boxplot(inno~month, data=mdlres)

#f
#the values of the coeffients tell us how much the mean of the y variable changes in a one unit change of the x variable.
#g
augment(fitsou2) %>%
features(.innov,ljung_box, lag = 24, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 TSLM(log(Sales) ~ trend() + season()) 102. 1.21e-11
#this tells us if our residuals are significantly different from zero, which it is because the Pvalue is near zero.
#h
#i You could take a box cox to improve the seasonality.
Exercise 4
oldgas <- us_gasoline %>%
slice(1:726)
autoplot(oldgas)
## Plot variable not specified, automatically selected `.vars = Barrels`

foldgas2 <- oldgas %>%
model(TSLM(Barrels~trend()+fourier(K=2)))
report(foldgas2)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9375162 -0.1897569 -0.0006692 0.2058275 1.0016928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.094e+00 2.121e-02 334.493 < 2e-16 ***
## trend() 2.802e-03 5.057e-05 55.420 < 2e-16 ***
## fourier(K = 2)C1_52 -1.237e-01 1.497e-02 -8.265 6.71e-16 ***
## fourier(K = 2)S1_52 -2.383e-01 1.497e-02 -15.917 < 2e-16 ***
## fourier(K = 2)C2_52 4.493e-02 1.495e-02 3.006 0.00274 **
## fourier(K = 2)S2_52 1.054e-02 1.498e-02 0.704 0.48193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.285 on 720 degrees of freedom
## Multiple R-squared: 0.8271, Adjusted R-squared: 0.8259
## F-statistic: 688.8 on 5 and 720 DF, p-value: < 2.22e-16
augment(foldgas2) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, colour = "Data")) +
geom_line(aes(y = .fitted, colour = "Fitted"))

foldgas11 <- oldgas %>%
model(TSLM(Barrels~trend()+fourier(K=11)))
report(foldgas11)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.896208 -0.177835 0.003786 0.173355 1.014511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 1.994e-02 355.717 < 2e-16 ***
## trend() 2.799e-03 4.756e-05 58.844 < 2e-16 ***
## fourier(K = 11)C1_52 -1.245e-01 1.407e-02 -8.845 < 2e-16 ***
## fourier(K = 11)S1_52 -2.394e-01 1.408e-02 -17.007 < 2e-16 ***
## fourier(K = 11)C2_52 4.529e-02 1.405e-02 3.223 0.00133 **
## fourier(K = 11)S2_52 9.256e-03 1.408e-02 0.657 0.51125
## fourier(K = 11)C3_52 9.632e-02 1.408e-02 6.842 1.70e-11 ***
## fourier(K = 11)S3_52 3.762e-05 1.405e-02 0.003 0.99787
## fourier(K = 11)C4_52 2.899e-02 1.408e-02 2.060 0.03980 *
## fourier(K = 11)S4_52 2.884e-02 1.406e-02 2.052 0.04054 *
## fourier(K = 11)C5_52 -3.354e-02 1.405e-02 -2.387 0.01725 *
## fourier(K = 11)S5_52 3.172e-02 1.408e-02 2.253 0.02458 *
## fourier(K = 11)C6_52 -6.569e-02 1.406e-02 -4.671 3.59e-06 ***
## fourier(K = 11)S6_52 2.808e-02 1.407e-02 1.996 0.04628 *
## fourier(K = 11)C7_52 -2.235e-02 1.408e-02 -1.588 0.11283
## fourier(K = 11)S7_52 3.274e-02 1.405e-02 2.330 0.02010 *
## fourier(K = 11)C8_52 -1.664e-02 1.406e-02 -1.183 0.23704
## fourier(K = 11)S8_52 -1.428e-03 1.407e-02 -0.102 0.91916
## fourier(K = 11)C9_52 -1.763e-02 1.406e-02 -1.254 0.21019
## fourier(K = 11)S9_52 -5.731e-04 1.407e-02 -0.041 0.96753
## fourier(K = 11)C10_52 1.272e-02 1.407e-02 0.904 0.36621
## fourier(K = 11)S10_52 -2.359e-02 1.406e-02 -1.678 0.09375 .
## fourier(K = 11)C11_52 -2.837e-02 1.407e-02 -2.016 0.04414 *
## fourier(K = 11)S11_52 2.555e-02 1.406e-02 1.817 0.06962 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.268 on 702 degrees of freedom
## Multiple R-squared: 0.851, Adjusted R-squared: 0.8461
## F-statistic: 174.3 on 23 and 702 DF, p-value: < 2.22e-16
augment(foldgas11) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, colour = "Data")) +
geom_line(aes(y = .fitted, colour = "Fitted"))

foldgas26 <- oldgas %>%
model(TSLM(Barrels~trend()+fourier(K=26)))
report(foldgas26)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.834099 -0.172836 0.001292 0.167365 1.034800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 1.994e-02 355.756 < 2e-16 ***
## trend() 2.799e-03 4.755e-05 58.858 < 2e-16 ***
## fourier(K = 26)C1_52 -1.244e-01 1.407e-02 -8.843 < 2e-16 ***
## fourier(K = 26)S1_52 -2.394e-01 1.408e-02 -17.004 < 2e-16 ***
## fourier(K = 26)C2_52 4.527e-02 1.405e-02 3.222 0.00134 **
## fourier(K = 26)S2_52 9.340e-03 1.408e-02 0.663 0.50740
## fourier(K = 26)C3_52 9.624e-02 1.408e-02 6.837 1.81e-11 ***
## fourier(K = 26)S3_52 7.369e-05 1.405e-02 0.005 0.99582
## fourier(K = 26)C4_52 2.892e-02 1.407e-02 2.055 0.04031 *
## fourier(K = 26)S4_52 2.880e-02 1.405e-02 2.050 0.04079 *
## fourier(K = 26)C5_52 -3.356e-02 1.405e-02 -2.389 0.01719 *
## fourier(K = 26)S5_52 3.164e-02 1.408e-02 2.247 0.02494 *
## fourier(K = 26)C6_52 -6.564e-02 1.406e-02 -4.668 3.67e-06 ***
## fourier(K = 26)S6_52 2.802e-02 1.407e-02 1.992 0.04676 *
## fourier(K = 26)C7_52 -2.227e-02 1.408e-02 -1.582 0.11409
## fourier(K = 26)S7_52 3.274e-02 1.405e-02 2.330 0.02009 *
## fourier(K = 26)C8_52 -1.659e-02 1.406e-02 -1.180 0.23839
## fourier(K = 26)S8_52 -1.368e-03 1.407e-02 -0.097 0.92254
## fourier(K = 26)C9_52 -1.765e-02 1.406e-02 -1.255 0.20977
## fourier(K = 26)S9_52 -4.998e-04 1.407e-02 -0.036 0.97168
## fourier(K = 26)C10_52 1.266e-02 1.407e-02 0.900 0.36859
## fourier(K = 26)S10_52 -2.356e-02 1.406e-02 -1.676 0.09419 .
## fourier(K = 26)C11_52 -2.843e-02 1.407e-02 -2.021 0.04366 *
## fourier(K = 26)S11_52 2.553e-02 1.406e-02 1.815 0.06991 .
## fourier(K = 26)C12_52 -1.181e-02 1.406e-02 -0.840 0.40118
## fourier(K = 26)S12_52 -2.505e-02 1.407e-02 -1.781 0.07539 .
## fourier(K = 26)C13_52 6.166e-03 1.406e-02 0.438 0.66119
## fourier(K = 26)S13_52 1.409e-02 1.406e-02 1.002 0.31687
## fourier(K = 26)C14_52 -3.056e-03 1.406e-02 -0.217 0.82802
## fourier(K = 26)S14_52 1.800e-02 1.407e-02 1.279 0.20120
## fourier(K = 26)C15_52 -1.247e-03 1.407e-02 -0.089 0.92937
## fourier(K = 26)S15_52 1.735e-02 1.406e-02 1.234 0.21766
## fourier(K = 26)C16_52 -1.207e-02 1.407e-02 -0.858 0.39117
## fourier(K = 26)S16_52 -1.492e-03 1.406e-02 -0.106 0.91550
## fourier(K = 26)C17_52 7.686e-03 1.406e-02 0.547 0.58466
## fourier(K = 26)S17_52 -2.178e-02 1.407e-02 -1.548 0.12210
## fourier(K = 26)C18_52 2.236e-03 1.406e-02 0.159 0.87369
## fourier(K = 26)S18_52 1.775e-02 1.407e-02 1.262 0.20739
## fourier(K = 26)C19_52 -2.301e-03 1.408e-02 -0.163 0.87021
## fourier(K = 26)S19_52 -6.677e-05 1.405e-02 -0.005 0.99621
## fourier(K = 26)C20_52 -2.977e-03 1.406e-02 -0.212 0.83240
## fourier(K = 26)S20_52 6.098e-04 1.407e-02 0.043 0.96543
## fourier(K = 26)C21_52 6.089e-03 1.405e-02 0.433 0.66483
## fourier(K = 26)S21_52 3.927e-03 1.408e-02 0.279 0.78039
## fourier(K = 26)C22_52 -8.038e-04 1.407e-02 -0.057 0.95447
## fourier(K = 26)S22_52 -1.126e-02 1.405e-02 -0.801 0.42323
## fourier(K = 26)C23_52 -2.159e-02 1.408e-02 -1.534 0.12551
## fourier(K = 26)S23_52 1.192e-02 1.405e-02 0.848 0.39656
## fourier(K = 26)C24_52 7.003e-04 1.405e-02 0.050 0.96025
## fourier(K = 26)S24_52 1.837e-02 1.408e-02 1.305 0.19241
## fourier(K = 26)C25_52 4.907e-03 1.406e-02 0.349 0.72717
## fourier(K = 26)S25_52 7.445e-03 1.407e-02 0.529 0.59686
## fourier(K = 26)C26_52 -3.084e-02 9.944e-03 -3.101 0.00201 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2679 on 673 degrees of freedom
## Multiple R-squared: 0.8572, Adjusted R-squared: 0.8461
## F-statistic: 77.67 on 52 and 673 DF, p-value: < 2.22e-16
augment(foldgas26) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, colour = "Data")) +
geom_line(aes(y = .fitted, colour = "Fitted"))

fit <- oldgas %>%
model(K1 = TSLM(Barrels ~ trend() + fourier(K = 1)),
K2 = TSLM(Barrels ~ trend() + fourier(K = 2)),
K3 = TSLM(Barrels ~ trend() + fourier(K = 3)),
K4 = TSLM(Barrels ~ trend() + fourier(K = 4)),
K5 = TSLM(Barrels ~ trend() + fourier(K = 5)),
K6 = TSLM(Barrels ~ trend() + fourier(K = 6)),
K7 = TSLM(Barrels ~trend()+ fourier(K=7)),
K8 = TSLM(Barrels ~trend()+ fourier(K=8)),
K9 = TSLM(Barrels ~trend()+ fourier(K=9)),
K10 = TSLM(Barrels ~trend()+ fourier(K=10)),
K11 = TSLM(Barrels ~trend()+ fourier(K=11)),
K20=TSLM(Barrels ~trend()+ fourier(K=20)),
K26=TSLM(Barrels ~trend()+ fourier(K=26)))
glance(fit) %>% select(.model, r_squared, adj_r_squared, AICc)
## # A tibble: 13 × 4
## .model r_squared adj_r_squared AICc
## <chr> <dbl> <dbl> <dbl>
## 1 K1 0.825 0.824 -1809.
## 2 K2 0.827 0.826 -1814.
## 3 K3 0.837 0.835 -1853.
## 4 K4 0.839 0.837 -1856.
## 5 K5 0.841 0.838 -1862.
## 6 K6 0.846 0.844 -1884.
## 7 K7 0.848 0.845 -1887.
## 8 K8 0.848 0.845 -1885.
## 9 K9 0.849 0.845 -1882.
## 10 K10 0.849 0.845 -1881.
## 11 K11 0.851 0.846 -1885.
## 12 K20 0.854 0.845 -1859.
## 13 K26 0.857 0.846 -1851.
#clearly k= 11 is the best choice because it has the lowest AICC.
#c
foldgas11 %>%
gg_tsresiduals()

augment(foldgas11) %>%
features(.innov, ljung_box, lag = 104, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 TSLM(Barrels ~ trend() + fourier(K = 11)) 149. 0.00268
#d
augfoldgas11 <- augment(foldgas11)
autoplot(augfoldgas11) +
autolayer(us_gasoline)
## Plot variable not specified, automatically selected `.vars = Barrels`
## Plot variable not specified, automatically selected `.vars = Barrels`

newgas <- us_gasoline %>% slice(727:1355)
autoplot(augfoldgas11)
## Plot variable not specified, automatically selected `.vars = Barrels`

Exercise 5
afgan <- global_economy %>%
filter(Country=="Afghanistan") %>%
select(Year, Population)
autoplot(afgan)
## Plot variable not specified, automatically selected `.vars = Population`

#in the 1980's there is a clear dip because of the Soviet Afghan war.
#b
aftrends <- afgan %>%
model(
Linear=TSLM(Population~trend()),
piecewise=TSLM(Population~trend(knots=c(1980,1989)))
)
afgan %>%
autoplot(Population) +
geom_line(data = fitted(aftrends),
aes(y = .fitted, colour = .model))

#clearly the piecewise function does much better.
afganfc <- aftrends %>%
forecast(h=5)
afgan %>%
autoplot(Population) +
geom_line(data = fitted(aftrends),
aes(x=Year, y = .fitted, colour = .model))+
autolayer(afganfc)

#yeah the piecewise is much better. keeps with the true data much better
Exercise 6
- assume model is reasonable approximation to reality, and that it is
linear. 2. error term has mean of zero. 3. error terms arent
autocorrelated. 4. error terms are unrelated to predictor variables. 5.
erroe terms are normally distributed.6. predictors arent random
variables. 6. no multicolinearity. 7. homoscedastic. Unbiased estimator
means that your estimation of B is equal to the actual value of B. A
consistant estimator means that as the number of observations apporaches
infinity, the estimator approaches the true value. R^2 tells you how
much of the variation in your y variable is explained by your model.
Adjusted R^2 takes degrees of freedom into account when doing this
calculation.