Hw #4

Questions

Exercise 1

Elec <- vic_elec

Shock <- vic_elec %>%
  filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
  index_by(Date = as_date(Time)) %>%
  summarise(Demand = sum(Demand), Temperature = max(Temperature))

Shock %>%
  gather("Variable", "Value", Demand, Temperature) %>%
  ggplot(aes(x = Date, y = Value, colour = Variable)) +
  geom_point() +
  facet_grid(vars(Variable), scales = "free_y") +
  labs(title = "Electricity Demand")

Shock %>%
  gather("Variable", "Value", Demand, Temperature) %>%
  ggplot(aes(x = Date, y = Value, colour = Variable)) +
  geom_point() +
  facet_grid(vars(Variable), scales = "free_y") +
  labs(title = "Electricity Demand") +
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Shock %>%
  gather("Variable", "Value", Demand, Temperature) %>%
  ggplot(aes(x = Date, y = Value, colour = Variable)) +
  geom_point() +
  facet_grid(vars(Variable), scales = "free_y") +
  labs(title = "Electricity Demand") +
  geom_smooth(method = "lm", se = FALSE)

## `geom_smooth()` using formula 'y ~ x'

Shock %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE) +
  labs(title = "Electricity Demand")

## `geom_smooth()` using formula 'y ~ x'

goodshit <- Shock %>% model(TSLM(Demand ~ Temperature))
goodshit %>%
  gg_tsresiduals()

gang15 <- Shock %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(new_data(Shock, 1) %>%
  mutate(Temperature = 15)) %>%
  autoplot(Shock)

gang15

gang35 <- Shock %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(new_data(Shock, 1) %>%
      mutate(Temperature = 35)) %>%
  autoplot(Shock)
gang35

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Registered S3 methods overwritten by 'forecast':
##   method                 from     
##   autoplot.Arima         ggfortify
##   autoplot.acf           ggfortify
##   autoplot.ar            ggfortify
##   autoplot.bats          ggfortify
##   autoplot.decomposed.ts ggfortify
##   autoplot.ets           ggfortify
##   autoplot.forecast      ggfortify
##   autoplot.stl           ggfortify
##   autoplot.ts            ggfortify
##   fitted.ar              ggfortify
##   fortify.ts             ggfortify
##   residuals.ar           ggfortify

## 
## Attaching package: 'forecast'

## The following objects are masked from 'package:fabletools':
## 
##     accuracy, forecast

uhoh <- lm(Demand ~ Temperature, data = Shock)

forecast(uhoh, newdata = data.frame(Temperature=c(15,35)))

##           Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 151398.35       151398.4 117127.2 185669.5  97951.22 204845.5
## 274484.25       274484.2 241333.0 307635.5 222783.69 326184.8

detach(package:forecast, unload = TRUE)

plot(Demand~Temperature, data = vic_elec)

As the temperature increases, the demand for electricity also increases. This is due to a higher demand for air conditioning, fans, and other methods of temp control. All of these things consume a lot of electricity.

Yes the model is adiquet. There are two outliers near Jan 27. This can also be shown on the histogram (on the far left) as the left tail spikes it skews the data to the right a bit.

The 35 degree forcast works well, however the 15 degree forcast is bad. It is very low even compared to the lowest temps.

This shows that when temps are very low, people demand electricity. When temps are very high people demand electricity even more.

Exercise 2

autoplot(olympic_running) +
  facet_wrap(~Length + Sex, scales = "free") +
  labs(title = "Running", x = "Year", y = "Time") +
  theme(legend.position = "none")

## Plot variable not specified, automatically selected `.vars = Time`

autoplot(olympic_running) +
  facet_wrap(~Length + Sex, scales = "free") +
  labs(title = "Running", x = "Year", y = "Time") +
  theme(legend.position = "none") +
  geom_smooth(method="lm", se=FALSE)

## Plot variable not specified, automatically selected `.vars = Time`
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 31 rows containing non-finite values (stat_smooth).

olympic_lm <- lm(Year ~ ., olympic_running)
olympic_lm

## 
## Call:
## lm(formula = Year ~ ., data = olympic_running)
## 
## Coefficients:
## (Intercept)       Length     Sexwomen         Time  
##  1947.93654      0.05872     34.13151     -0.32515

library(forecast)

## 
## Attaching package: 'forecast'
## 
## The following objects are masked from 'package:fabletools':
## 
##     accuracy, forecast

checkresiduals(olympic_lm)

## 
##  Breusch-Godfrey test for serial correlation of order up to 10
## 
## data:  Residuals
## LM test = 179.57, df = 10, p-value < 2.2e-16

detach(package:forecast, unload = TRUE)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "100") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 100)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "100") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 100)")

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "200") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 200)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "200") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 200)")

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "400") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 400)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "400") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 400)")

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "800") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 800)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "800") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 800)")

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "1500") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 1500)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "1500") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 1500)")

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "5000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 5000)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "5000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 5000)")

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "10000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Men's 10000)")

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "10000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running) +
  labs(title = "Forecasted Winning Time (Women's 10000)")

For all lengths of races, the trend seems to be trending downward. This is likely due to people becoming better and faster at running over time.

the time is decreasing by 0.32515 seconds per year

The assumption made is that people are going to continue to get better at running, this isn’t true. There will be years where times may go up. (Due to someone great retiring or something like that)

Exercise 3

plot(souvenirs)

autoplot(souvenirs) + 
  labs(title = "Time Series of Australian Souvenirs")

## Plot variable not specified, automatically selected `.vars = Sales`

souvnum <- souvenirs
souvsales <- as.numeric(souvenirs$Sales)
hist(souvsales)

souvlog <- souvnum
souvlogSales <- log(souvnum$Sales)
hist(souvlogSales)

souv <- ts(souvenirs$Sales,start=c(1987,1), end=c(1993,6), frequency=12)
souvlog2 <- log(souv)
dummysurffest <- rep(0, length(souv))
dummysurffest[seq_along(dummysurffest)%%12 == 3] = 1
dummysurffest[3] = 0
dummysurffest <- ts(dummysurffest, freq = 12, start=c(1987,1))
Bothofem <- data.frame(souvlog2, dummysurffest)

library(forecast)

## 
## Attaching package: 'forecast'
## 
## The following objects are masked from 'package:fabletools':
## 
##     accuracy, forecast

fitty <- tslm(souvlog2 ~ trend + season + dummysurffest, data = Bothofem)

forecastmaybe <- data.frame(dummysurffest = rep(0, 12))
forecastmaybe[3,] = 1
forecast(fitty, newdata = forecastmaybe)

##          Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## Jul 1993       9.883136  9.637329 10.128943  9.503919 10.262353
## Aug 1993       9.867772  9.621965 10.113579  9.488555 10.246989
## Sep 1993      10.531942 10.191069 10.872815 10.006062 11.057822
## Oct 1993      10.092605  9.846798 10.338412  9.713388 10.471822
## Nov 1993      10.585189 10.339382 10.830995 10.205971 10.964406
## Dec 1993      11.357556 11.111749 11.603363 10.978339 11.736773
## Jan 1994       9.430933  9.186093  9.675774  9.053207  9.808659
## Feb 1994       9.704370  9.459530  9.949210  9.326644 10.082096
## Mar 1994       9.695742  9.365756 10.025727  9.186658 10.204825
## Apr 1994       9.881046  9.636206 10.125887  9.503320 10.258772
## May 1994       9.928500  9.683659 10.173340  9.550774 10.306226
## Jun 1994       9.989861  9.745020 10.234701  9.612135 10.367587

plot(fitty$residuals)

boxplot(residuals(fitty)~cycle(residuals(fitty)))

fitty$coefficients

##   (Intercept)         trend       season2       season3       season4 
##     7.6662400     0.0207611     0.2526756     0.2232860     0.3878297 
##       season5       season6       season7       season8       season9 
##     0.4145219     0.4551219     0.5767690     0.5406444     0.6296713 
##      season10      season11      season12 dummysurffest 
##     0.7239552     1.1957774     1.9473841     0.5543818

checkresiduals(fitty)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals from Linear regression model
## LM test = 31.535, df = 17, p-value = 0.01718

forecastcoolio <- data.frame(dummysurffest = rep(0, 36))
predictions <- forecast(fitty, newdata = forecastcoolio)
predictions

##          Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## Jul 1993       9.883136  9.637329 10.128943  9.503919 10.262353
## Aug 1993       9.867772  9.621965 10.113579  9.488555 10.246989
## Sep 1993       9.977560  9.731753 10.223367  9.598343 10.356777
## Oct 1993      10.092605  9.846798 10.338412  9.713388 10.471822
## Nov 1993      10.585189 10.339382 10.830995 10.205971 10.964406
## Dec 1993      11.357556 11.111749 11.603363 10.978339 11.736773
## Jan 1994       9.430933  9.186093  9.675774  9.053207  9.808659
## Feb 1994       9.704370  9.459530  9.949210  9.326644 10.082096
## Mar 1994       9.695742  9.365756 10.025727  9.186658 10.204825
## Apr 1994       9.881046  9.636206 10.125887  9.503320 10.258772
## May 1994       9.928500  9.683659 10.173340  9.550774 10.306226
## Jun 1994       9.989861  9.745020 10.234701  9.612135 10.367587
## Jul 1994      10.132269  9.883394 10.381144  9.748319 10.516219
## Aug 1994      10.116905  9.868031 10.365780  9.732955 10.500856
## Sep 1994      10.226693  9.977819 10.475568  9.842743 10.610644
## Oct 1994      10.341738 10.092864 10.590613  9.957788 10.725689
## Nov 1994      10.834322 10.585447 11.083197 10.450372 11.218272
## Dec 1994      11.606690 11.357815 11.855564 11.222739 11.990640
## Jan 1995       9.680067  9.431764  9.928369  9.296999 10.063134
## Feb 1995       9.953503  9.705201 10.201806  9.570436 10.336570
## Mar 1995       9.944875  9.610605 10.279144  9.429182 10.460567
## Apr 1995      10.130180  9.881877 10.378482  9.747112 10.513247
## May 1995      10.177633  9.929330 10.425935  9.794566 10.560700
## Jun 1995      10.238994  9.990691 10.487296  9.855927 10.622061
## Jul 1995      10.381402 10.128745 10.634059  9.991617 10.771188
## Aug 1995      10.366039 10.113381 10.618696  9.976253 10.755824
## Sep 1995      10.475827 10.223169 10.728484 10.086041 10.865612
## Oct 1995      10.590872 10.338214 10.843529 10.201086 10.980657
## Nov 1995      11.083455 10.830798 11.336112 10.693669 11.473240
## Dec 1995      11.855823 11.603165 12.108480 11.466037 12.245608
## Jan 1996       9.929200  9.676730 10.181669  9.539704 10.318696
## Feb 1996      10.202636  9.950167 10.455106  9.813141 10.592132
## Mar 1996      10.194008  9.854949 10.533067  9.670926 10.717090
## Apr 1996      10.379313 10.126843 10.631782  9.989817 10.768809
## May 1996      10.426766 10.174296 10.679236 10.037270 10.816262
## Jun 1996      10.488127 10.235658 10.740597 10.098631 10.877623

autoplot(predictions)

there is a huge spike in December of each year. This is probably due to it being summer during that time. (more people visiting) There is also a slight dip during 1991, this is likely due to a small recession. Overall, there is an upward trend in the data.

The very large spikes in December skew the data to not be normally distributed. When you take the log, it transforms the data to fit it into a normal distribution.

The residuals don’t show much or any trend or seasonality. This means that the residuals are white noise

These show the correlations with the variables. Seasons 11 and 12 have the highest impact on souvenir sales.

The residuals have a relatively normal distribution and there is a significant p-value.

The ljung-Box tells you if your residuals are statistically different from white noise. The lower, the more they don’t resemble white noise.

I feel like a BoxCox transformation could help fix some of the homoscedasticity in the model and help smooth out the variance of the residuals.

Exercise 4

us_gas <- us_gasoline %>%
  filter(year(Week) < "2005")

fourier_gas1 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 1)))
report(fourier_gas1)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.969489 -0.197166 -0.002252  0.200869  0.975792 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.092e+00  2.131e-02 332.782  < 2e-16 ***
## trend()              2.807e-03  5.081e-05  55.237  < 2e-16 ***
## fourier(K = 1)C1_52 -1.238e-01  1.505e-02  -8.226 9.01e-16 ***
## fourier(K = 1)S1_52 -2.383e-01  1.505e-02 -15.832  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2865 on 722 degrees of freedom
## Multiple R-squared: 0.8248,  Adjusted R-squared: 0.8241
## F-statistic:  1133 on 3 and 722 DF, p-value: < 2.22e-16

augment(fourier_gas1) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

detach(package:forecast, unload = TRUE)
fourier_gas1 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas2 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 2)))
report(fourier_gas2)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.9375162 -0.1897569 -0.0006692  0.2058275  1.0016928 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.094e+00  2.121e-02 334.493  < 2e-16 ***
## trend()              2.802e-03  5.057e-05  55.420  < 2e-16 ***
## fourier(K = 2)C1_52 -1.237e-01  1.497e-02  -8.265 6.71e-16 ***
## fourier(K = 2)S1_52 -2.383e-01  1.497e-02 -15.917  < 2e-16 ***
## fourier(K = 2)C2_52  4.493e-02  1.495e-02   3.006  0.00274 ** 
## fourier(K = 2)S2_52  1.054e-02  1.498e-02   0.704  0.48193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.285 on 720 degrees of freedom
## Multiple R-squared: 0.8271,  Adjusted R-squared: 0.8259
## F-statistic: 688.8 on 5 and 720 DF, p-value: < 2.22e-16

augment(fourier_gas2) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas2 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas5 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 5)))
report(fourier_gas5)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.8568361 -0.1759670 -0.0002609  0.1916623  0.9380241 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.095e+00  2.043e-02 347.198  < 2e-16 ***
## trend()              2.798e-03  4.873e-05  57.428  < 2e-16 ***
## fourier(K = 5)C1_52 -1.242e-01  1.442e-02  -8.610  < 2e-16 ***
## fourier(K = 5)S1_52 -2.390e-01  1.442e-02 -16.570  < 2e-16 ***
## fourier(K = 5)C2_52  4.517e-02  1.440e-02   3.137  0.00178 ** 
## fourier(K = 5)S2_52  9.760e-03  1.443e-02   0.676  0.49898    
## fourier(K = 5)C3_52  9.586e-02  1.442e-02   6.646    6e-11 ***
## fourier(K = 5)S3_52  2.543e-04  1.440e-02   0.018  0.98592    
## fourier(K = 5)C4_52  2.854e-02  1.442e-02   1.979  0.04821 *  
## fourier(K = 5)S4_52  2.861e-02  1.440e-02   1.987  0.04733 *  
## fourier(K = 5)C5_52 -3.364e-02  1.440e-02  -2.336  0.01974 *  
## fourier(K = 5)S5_52  3.123e-02  1.443e-02   2.165  0.03073 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2746 on 714 degrees of freedom
## Multiple R-squared: 0.8409,  Adjusted R-squared: 0.8384
## F-statistic:   343 on 11 and 714 DF, p-value: < 2.22e-16

augment(fourier_gas5) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas5 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas10 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 10)))
report(fourier_gas10)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.8625657 -0.1755251  0.0003646  0.1817594  0.9806206 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.094e+00  2.002e-02 354.363  < 2e-16 ***
## trend()                2.799e-03  4.774e-05  58.625  < 2e-16 ***
## fourier(K = 10)C1_52  -1.245e-01  1.413e-02  -8.814  < 2e-16 ***
## fourier(K = 10)S1_52  -2.395e-01  1.413e-02 -16.945  < 2e-16 ***
## fourier(K = 10)C2_52   4.529e-02  1.411e-02   3.210  0.00139 ** 
## fourier(K = 10)S2_52   9.203e-03  1.414e-02   0.651  0.51524    
## fourier(K = 10)C3_52   9.636e-02  1.413e-02   6.819 1.98e-11 ***
## fourier(K = 10)S3_52  -4.035e-06  1.411e-02   0.000  0.99977    
## fourier(K = 10)C4_52   2.905e-02  1.413e-02   2.056  0.04015 *  
## fourier(K = 10)S4_52   2.884e-02  1.411e-02   2.044  0.04134 *  
## fourier(K = 10)C5_52  -3.349e-02  1.410e-02  -2.375  0.01783 *  
## fourier(K = 10)S5_52   3.176e-02  1.413e-02   2.247  0.02494 *  
## fourier(K = 10)C6_52  -6.569e-02  1.412e-02  -4.653 3.91e-06 ***
## fourier(K = 10)S6_52   2.815e-02  1.412e-02   1.993  0.04660 *  
## fourier(K = 10)C7_52  -2.240e-02  1.413e-02  -1.585  0.11340    
## fourier(K = 10)S7_52   3.279e-02  1.411e-02   2.324  0.02038 *  
## fourier(K = 10)C8_52  -1.671e-02  1.411e-02  -1.184  0.23676    
## fourier(K = 10)S8_52  -1.432e-03  1.412e-02  -0.101  0.91926    
## fourier(K = 10)C9_52  -1.768e-02  1.411e-02  -1.253  0.21066    
## fourier(K = 10)S9_52  -6.335e-04  1.413e-02  -0.045  0.96424    
## fourier(K = 10)C10_52  1.274e-02  1.412e-02   0.902  0.36751    
## fourier(K = 10)S10_52 -2.368e-02  1.411e-02  -1.678  0.09387 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.269 on 704 degrees of freedom
## Multiple R-squared: 0.8494,  Adjusted R-squared: 0.8449
## F-statistic: 189.1 on 21 and 704 DF, p-value: < 2.22e-16

augment(fourier_gas10) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas10 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas20 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 20)))
report(fourier_gas20)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.8455538 -0.1703796  0.0009278  0.1724455  0.9995200 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.095e+00  2.001e-02 354.550  < 2e-16 ***
## trend()                2.798e-03  4.772e-05  58.649  < 2e-16 ***
## fourier(K = 20)C1_52  -1.245e-01  1.412e-02  -8.814  < 2e-16 ***
## fourier(K = 20)S1_52  -2.394e-01  1.413e-02 -16.946  < 2e-16 ***
## fourier(K = 20)C2_52   4.525e-02  1.410e-02   3.209  0.00139 ** 
## fourier(K = 20)S2_52   9.317e-03  1.413e-02   0.659  0.50988    
## fourier(K = 20)C3_52   9.624e-02  1.413e-02   6.814 2.09e-11 ***
## fourier(K = 20)S3_52   3.202e-05  1.410e-02   0.002  0.99819    
## fourier(K = 20)C4_52   2.895e-02  1.412e-02   2.050  0.04072 *  
## fourier(K = 20)S4_52   2.877e-02  1.410e-02   2.040  0.04175 *  
## fourier(K = 20)C5_52  -3.349e-02  1.410e-02  -2.376  0.01779 *  
## fourier(K = 20)S5_52   3.165e-02  1.413e-02   2.240  0.02541 *  
## fourier(K = 20)C6_52  -6.559e-02  1.411e-02  -4.649 4.01e-06 ***
## fourier(K = 20)S6_52   2.808e-02  1.411e-02   1.990  0.04701 *  
## fourier(K = 20)C7_52  -2.229e-02  1.413e-02  -1.578  0.11498    
## fourier(K = 20)S7_52   3.283e-02  1.410e-02   2.329  0.02018 *  
## fourier(K = 20)C8_52  -1.668e-02  1.411e-02  -1.183  0.23735    
## fourier(K = 20)S8_52  -1.323e-03  1.412e-02  -0.094  0.92536    
## fourier(K = 20)C9_52  -1.775e-02  1.410e-02  -1.259  0.20862    
## fourier(K = 20)S9_52  -5.484e-04  1.412e-02  -0.039  0.96903    
## fourier(K = 20)C10_52  1.263e-02  1.412e-02   0.894  0.37140    
## fourier(K = 20)S10_52 -2.368e-02  1.411e-02  -1.679  0.09360 .  
## fourier(K = 20)C11_52 -2.835e-02  1.411e-02  -2.008  0.04500 *  
## fourier(K = 20)S11_52  2.542e-02  1.411e-02   1.801  0.07208 .  
## fourier(K = 20)C12_52 -1.166e-02  1.411e-02  -0.827  0.40869    
## fourier(K = 20)S12_52 -2.505e-02  1.411e-02  -1.775  0.07633 .  
## fourier(K = 20)C13_52  6.269e-03  1.411e-02   0.444  0.65703    
## fourier(K = 20)S13_52  1.421e-02  1.411e-02   1.007  0.31442    
## fourier(K = 20)C14_52 -3.088e-03  1.411e-02  -0.219  0.82685    
## fourier(K = 20)S14_52  1.816e-02  1.411e-02   1.287  0.19865    
## fourier(K = 20)C15_52 -1.403e-03  1.411e-02  -0.099  0.92086    
## fourier(K = 20)S15_52  1.743e-02  1.411e-02   1.235  0.21708    
## fourier(K = 20)C16_52 -1.224e-02  1.412e-02  -0.867  0.38624    
## fourier(K = 20)S16_52 -1.567e-03  1.411e-02  -0.111  0.91160    
## fourier(K = 20)C17_52  7.639e-03  1.410e-02   0.542  0.58827    
## fourier(K = 20)S17_52 -2.197e-02  1.412e-02  -1.556  0.12021    
## fourier(K = 20)C18_52  2.355e-03  1.411e-02   0.167  0.86749    
## fourier(K = 20)S18_52  1.760e-02  1.412e-02   1.247  0.21299    
## fourier(K = 20)C19_52 -2.098e-03  1.412e-02  -0.149  0.88197    
## fourier(K = 20)S19_52 -7.173e-05  1.410e-02  -0.005  0.99594    
## fourier(K = 20)C20_52 -2.844e-03  1.411e-02  -0.202  0.84034    
## fourier(K = 20)S20_52  7.698e-04  1.411e-02   0.055  0.95652    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2688 on 684 degrees of freedom
## Multiple R-squared: 0.8538,  Adjusted R-squared: 0.8451
## F-statistic: 97.46 on 41 and 684 DF, p-value: < 2.22e-16

augment(fourier_gas20) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas20 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas26 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 26)))
report(fourier_gas26)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.834099 -0.172836  0.001292  0.167365  1.034800 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.095e+00  1.994e-02 355.756  < 2e-16 ***
## trend()                2.799e-03  4.755e-05  58.858  < 2e-16 ***
## fourier(K = 26)C1_52  -1.244e-01  1.407e-02  -8.843  < 2e-16 ***
## fourier(K = 26)S1_52  -2.394e-01  1.408e-02 -17.004  < 2e-16 ***
## fourier(K = 26)C2_52   4.527e-02  1.405e-02   3.222  0.00134 ** 
## fourier(K = 26)S2_52   9.340e-03  1.408e-02   0.663  0.50740    
## fourier(K = 26)C3_52   9.624e-02  1.408e-02   6.837 1.81e-11 ***
## fourier(K = 26)S3_52   7.369e-05  1.405e-02   0.005  0.99582    
## fourier(K = 26)C4_52   2.892e-02  1.407e-02   2.055  0.04031 *  
## fourier(K = 26)S4_52   2.880e-02  1.405e-02   2.050  0.04079 *  
## fourier(K = 26)C5_52  -3.356e-02  1.405e-02  -2.389  0.01719 *  
## fourier(K = 26)S5_52   3.164e-02  1.408e-02   2.247  0.02494 *  
## fourier(K = 26)C6_52  -6.564e-02  1.406e-02  -4.668 3.67e-06 ***
## fourier(K = 26)S6_52   2.802e-02  1.407e-02   1.992  0.04676 *  
## fourier(K = 26)C7_52  -2.227e-02  1.408e-02  -1.582  0.11409    
## fourier(K = 26)S7_52   3.274e-02  1.405e-02   2.330  0.02009 *  
## fourier(K = 26)C8_52  -1.659e-02  1.406e-02  -1.180  0.23839    
## fourier(K = 26)S8_52  -1.368e-03  1.407e-02  -0.097  0.92254    
## fourier(K = 26)C9_52  -1.765e-02  1.406e-02  -1.255  0.20977    
## fourier(K = 26)S9_52  -4.998e-04  1.407e-02  -0.036  0.97168    
## fourier(K = 26)C10_52  1.266e-02  1.407e-02   0.900  0.36859    
## fourier(K = 26)S10_52 -2.356e-02  1.406e-02  -1.676  0.09419 .  
## fourier(K = 26)C11_52 -2.843e-02  1.407e-02  -2.021  0.04366 *  
## fourier(K = 26)S11_52  2.553e-02  1.406e-02   1.815  0.06991 .  
## fourier(K = 26)C12_52 -1.181e-02  1.406e-02  -0.840  0.40118    
## fourier(K = 26)S12_52 -2.505e-02  1.407e-02  -1.781  0.07539 .  
## fourier(K = 26)C13_52  6.166e-03  1.406e-02   0.438  0.66119    
## fourier(K = 26)S13_52  1.409e-02  1.406e-02   1.002  0.31687    
## fourier(K = 26)C14_52 -3.056e-03  1.406e-02  -0.217  0.82802    
## fourier(K = 26)S14_52  1.800e-02  1.407e-02   1.279  0.20120    
## fourier(K = 26)C15_52 -1.247e-03  1.407e-02  -0.089  0.92937    
## fourier(K = 26)S15_52  1.735e-02  1.406e-02   1.234  0.21766    
## fourier(K = 26)C16_52 -1.207e-02  1.407e-02  -0.858  0.39117    
## fourier(K = 26)S16_52 -1.492e-03  1.406e-02  -0.106  0.91550    
## fourier(K = 26)C17_52  7.686e-03  1.406e-02   0.547  0.58466    
## fourier(K = 26)S17_52 -2.178e-02  1.407e-02  -1.548  0.12210    
## fourier(K = 26)C18_52  2.236e-03  1.406e-02   0.159  0.87369    
## fourier(K = 26)S18_52  1.775e-02  1.407e-02   1.262  0.20739    
## fourier(K = 26)C19_52 -2.301e-03  1.408e-02  -0.163  0.87021    
## fourier(K = 26)S19_52 -6.677e-05  1.405e-02  -0.005  0.99621    
## fourier(K = 26)C20_52 -2.977e-03  1.406e-02  -0.212  0.83240    
## fourier(K = 26)S20_52  6.098e-04  1.407e-02   0.043  0.96543    
## fourier(K = 26)C21_52  6.089e-03  1.405e-02   0.433  0.66483    
## fourier(K = 26)S21_52  3.927e-03  1.408e-02   0.279  0.78039    
## fourier(K = 26)C22_52 -8.038e-04  1.407e-02  -0.057  0.95447    
## fourier(K = 26)S22_52 -1.126e-02  1.405e-02  -0.801  0.42323    
## fourier(K = 26)C23_52 -2.159e-02  1.408e-02  -1.534  0.12551    
## fourier(K = 26)S23_52  1.192e-02  1.405e-02   0.848  0.39656    
## fourier(K = 26)C24_52  7.003e-04  1.405e-02   0.050  0.96025    
## fourier(K = 26)S24_52  1.837e-02  1.408e-02   1.305  0.19241    
## fourier(K = 26)C25_52  4.907e-03  1.406e-02   0.349  0.72717    
## fourier(K = 26)S25_52  7.445e-03  1.407e-02   0.529  0.59686    
## fourier(K = 26)C26_52 -3.084e-02  9.944e-03  -3.101  0.00201 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2679 on 673 degrees of freedom
## Multiple R-squared: 0.8572,  Adjusted R-squared: 0.8461
## F-statistic: 77.67 on 52 and 673 DF, p-value: < 2.22e-16

augment(fourier_gas26) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas26 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

gas_fit <- us_gas %>%
  model(K1 = TSLM(Barrels ~ trend() + fourier(K = 1)),
        K2 = TSLM(Barrels ~ trend() + fourier(K = 2)),
        K5 = TSLM(Barrels ~ trend() + fourier(K = 5)),
        K10 = TSLM(Barrels ~ trend() + fourier(K = 10)),
        K20 = TSLM(Barrels ~ trend() + fourier(K = 20)),
        K26 = TSLM(Barrels ~ trend() + fourier(K = 26)))
glance(gas_fit) %>% select(.model, r_squared, adj_r_squared, AICc)

## # A tibble: 6 × 4
##   .model r_squared adj_r_squared   AICc
##   <chr>      <dbl>         <dbl>  <dbl>
## 1 K1         0.825         0.824 -1809.
## 2 K2         0.827         0.826 -1814.
## 3 K5         0.841         0.838 -1862.
## 4 K10        0.849         0.845 -1881.
## 5 K20        0.854         0.845 -1859.
## 6 K26        0.857         0.846 -1851.

gas_fit <- us_gas %>%
  model(K11 = TSLM(Barrels ~ trend() + fourier(K = 11)),
        K12 = TSLM(Barrels ~ trend() + fourier(K = 12)),
        K13 = TSLM(Barrels ~ trend() + fourier(K = 13)),
        K14 = TSLM(Barrels ~ trend() + fourier(K = 14)),
        K15 = TSLM(Barrels ~ trend() + fourier(K = 15)),
        K16 = TSLM(Barrels ~ trend() + fourier(K = 16)),
        K17 = TSLM(Barrels ~ trend() + fourier(K = 17)),
        K18 = TSLM(Barrels ~ trend() + fourier(K = 18)),
        K19 = TSLM(Barrels ~ trend() + fourier(K = 19)))
glance(gas_fit) %>% select(.model, r_squared, adj_r_squared, AICc)

## # A tibble: 9 × 4
##   .model r_squared adj_r_squared   AICc
##   <chr>      <dbl>         <dbl>  <dbl>
## 1 K11        0.851         0.846 -1885.
## 2 K12        0.852         0.847 -1884.
## 3 K13        0.852         0.846 -1881.
## 4 K14        0.852         0.846 -1879.
## 5 K15        0.853         0.846 -1876.
## 6 K16        0.853         0.846 -1872.
## 7 K17        0.853         0.846 -1871.
## 8 K18        0.854         0.846 -1868.
## 9 K19        0.854         0.846 -1864.

fourier_gas11 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 11)))
report(fourier_gas11)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.896208 -0.177835  0.003786  0.173355  1.014511 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.095e+00  1.994e-02 355.717  < 2e-16 ***
## trend()                2.799e-03  4.756e-05  58.844  < 2e-16 ***
## fourier(K = 11)C1_52  -1.245e-01  1.407e-02  -8.845  < 2e-16 ***
## fourier(K = 11)S1_52  -2.394e-01  1.408e-02 -17.007  < 2e-16 ***
## fourier(K = 11)C2_52   4.529e-02  1.405e-02   3.223  0.00133 ** 
## fourier(K = 11)S2_52   9.256e-03  1.408e-02   0.657  0.51125    
## fourier(K = 11)C3_52   9.632e-02  1.408e-02   6.842 1.70e-11 ***
## fourier(K = 11)S3_52   3.762e-05  1.405e-02   0.003  0.99787    
## fourier(K = 11)C4_52   2.899e-02  1.408e-02   2.060  0.03980 *  
## fourier(K = 11)S4_52   2.884e-02  1.406e-02   2.052  0.04054 *  
## fourier(K = 11)C5_52  -3.354e-02  1.405e-02  -2.387  0.01725 *  
## fourier(K = 11)S5_52   3.172e-02  1.408e-02   2.253  0.02458 *  
## fourier(K = 11)C6_52  -6.569e-02  1.406e-02  -4.671 3.59e-06 ***
## fourier(K = 11)S6_52   2.808e-02  1.407e-02   1.996  0.04628 *  
## fourier(K = 11)C7_52  -2.235e-02  1.408e-02  -1.588  0.11283    
## fourier(K = 11)S7_52   3.274e-02  1.405e-02   2.330  0.02010 *  
## fourier(K = 11)C8_52  -1.664e-02  1.406e-02  -1.183  0.23704    
## fourier(K = 11)S8_52  -1.428e-03  1.407e-02  -0.102  0.91916    
## fourier(K = 11)C9_52  -1.763e-02  1.406e-02  -1.254  0.21019    
## fourier(K = 11)S9_52  -5.731e-04  1.407e-02  -0.041  0.96753    
## fourier(K = 11)C10_52  1.272e-02  1.407e-02   0.904  0.36621    
## fourier(K = 11)S10_52 -2.359e-02  1.406e-02  -1.678  0.09375 .  
## fourier(K = 11)C11_52 -2.837e-02  1.407e-02  -2.016  0.04414 *  
## fourier(K = 11)S11_52  2.555e-02  1.406e-02   1.817  0.06962 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.268 on 702 degrees of freedom
## Multiple R-squared: 0.851,   Adjusted R-squared: 0.8461
## F-statistic: 174.3 on 23 and 702 DF, p-value: < 2.22e-16

augment(fourier_gas11) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas11 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas11 %>%
  gg_tsresiduals()

augment(fourier_gas11) %>%
  features(.resid, ljung_box, lag = 104, dof = 0)

## # A tibble: 1 × 3
##   .model                                    lb_stat lb_pvalue
##   <chr>                                       <dbl>     <dbl>
## 1 TSLM(Barrels ~ trend() + fourier(K = 11))    149.   0.00268

gas_plot <- us_gas %>%
  model(
    Mean = MEAN(Barrels),
    Naive = NAIVE(Barrels),
    Seasonal_naive = SNAIVE(Barrels),
    Drift = RW(Barrels ~ drift()))
gas_plot %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

fourier_gas11 %>%
  forecast(h = 52) %>%
  autoplot(us_gas)

The model is pretty accurate. An exception to the accuracy could be the massive drop right after 2006. The drop was still within the forecasts lower bounds so it would still be within the confidence interval.

Exercise 5

global_economy %>%
  filter(Country=="Afghanistan") %>%
  tsibble(key = Code, index = Year) %>%
  autoplot(Population, show.legend =  FALSE) +
  labs(title= "Afghanistan Population", y = "Population")

global_economy %>%
  filter(Country=="Afghanistan")%>%
  model(TSLM(Population ~ Year)) %>%
  report()

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5794518 -2582559   744761  2259222  6036280 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -829292529   49730866  -16.68   <2e-16 ***
## Year            425774      25008   17.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3188000 on 56 degrees of freedom
## Multiple R-squared: 0.8381,  Adjusted R-squared: 0.8352
## F-statistic: 289.9 on 1 and 56 DF, p-value: < 2.22e-16

global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year<1980)%>%
  model(TSLM(Population ~ Year)) %>%
  report()

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -146380.8 -110290.6    -451.2  105877.8  202881.3 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -470734527    8787062  -53.57   <2e-16 ***
## Year            244657       4462   54.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 115100 on 18 degrees of freedom
## Multiple R-squared: 0.994,   Adjusted R-squared: 0.9937
## F-statistic:  3007 on 1 and 18 DF, p-value: < 2.22e-16

global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year>1989)%>%
  model(TSLM(Population ~ Year)) %>%
  report()

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -619234 -212927    6598  234280  612277 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.670e+09  1.640e+07  -101.8   <2e-16 ***
## Year         8.451e+05  8.184e+03   103.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 349800 on 26 degrees of freedom
## Multiple R-squared: 0.9976,  Adjusted R-squared: 0.9975
## F-statistic: 1.066e+04 on 1 and 26 DF, p-value: < 2.22e-16

m1 <- global_economy %>%
  filter(Country=="Afghanistan")%>%
  model(TSLM(Population ~ Year)) 

m2 <- global_economy %>%
  filter(Country=="Afghanistan")%>%
  filter(Year>1989)%>%
  model(TSLM(Population ~ Year)) 

globalboi <- global_economy %>%
  filter(Country=="Afghanistan")

afgh.pfit <- globalboi %>%
  model(piecewise = TSLM(Population ~ trend(knots = c(1980, 1989))))
report(afgh.pfit)

## Series: Population 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -577590 -174198  -16784  187226  679947 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           8697573     131122   66.33   <2e-16 ***
## trend(knots = c(1980, 1989))trend      224372       9623   23.32   <2e-16 ***
## trend(knots = c(1980, 1989))trend_21  -456804      24498  -18.65   <2e-16 ***
## trend(knots = c(1980, 1989))trend_30  1082782      21418   50.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 300900 on 54 degrees of freedom
## Multiple R-squared: 0.9986,  Adjusted R-squared: 0.9985
## F-statistic: 1.293e+04 on 3 and 54 DF, p-value: < 2.22e-16

afgh.pfit %>% gg_tsresiduals()

forecast(m1, h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  2018   N(3e+07, 1.1e+13) 29919575.
## 2 Afghanistan TSLM(Population ~ Year)  2019   N(3e+07, 1.1e+13) 30345349.
## 3 Afghanistan TSLM(Population ~ Year)  2020 N(3.1e+07, 1.1e+13) 30771123.
## 4 Afghanistan TSLM(Population ~ Year)  2021 N(3.1e+07, 1.1e+13) 31196897.
## 5 Afghanistan TSLM(Population ~ Year)  2022 N(3.2e+07, 1.1e+13) 31622671.

forecast(m2, h=5)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country     .model                   Year          Population     .mean
##   <fct>       <chr>                   <dbl>              <dist>     <dbl>
## 1 Afghanistan TSLM(Population ~ Year)  2018 N(3.6e+07, 1.4e+11) 35925602.
## 2 Afghanistan TSLM(Population ~ Year)  2019 N(3.7e+07, 1.4e+11) 36770747.
## 3 Afghanistan TSLM(Population ~ Year)  2020 N(3.8e+07, 1.4e+11) 37615892.
## 4 Afghanistan TSLM(Population ~ Year)  2021 N(3.8e+07, 1.5e+11) 38461037.
## 5 Afghanistan TSLM(Population ~ Year)  2022 N(3.9e+07, 1.5e+11) 39306182.

afghan_drift <- globalboi %>%
  model(RW(Population ~ drift())) %>%
  forecast(h = "5 years") %>%
  autoplot(globalboi) +
  labs(title = "Drift Forecast of Afghanistan Population", y = "Population in Millions")
afghan_drift

A drift works best, due to the way the data is. This is true because there isn’t seasonality. The drift fits well with this model. I feel like this is a pretty accurate forcast

Exercise 6

We assume that the model is a reasonable approximation to reality (linear).

No perfect Multicollinearity

The sum of errors is zero

Errors have a constant variance σ2 (homoscedasticity)

Error terms are not autocorrelated

Errors are unrelated to the predictor variables

Errors follow a normal distribution

A consistent estimator means that as the sample size gets larger, the estimate gets more and more accurate. An estimate is unbiased if the expected value is equal to the true value. This means that unbiased estimators aren’t effected by the size of the sample.
R squared explains the percentage of the variation in your error term that your model can explain. Adjusted R squared is identical to R squared except that adjusted R squared adjusts for degrees of freedom.