library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
library(readxl)
library(seasonal)
library(caTools)
Elec <- vic_elec
Shock <- vic_elec %>%
filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
index_by(Date = as_date(Time)) %>%
summarise(Demand = sum(Demand), Temperature = max(Temperature))
Shock %>%
gather("Variable", "Value", Demand, Temperature) %>%
ggplot(aes(x = Date, y = Value, colour = Variable)) +
geom_point() +
facet_grid(vars(Variable), scales = "free_y") +
labs(title = "Electricity Demand")
Shock %>%
gather("Variable", "Value", Demand, Temperature) %>%
ggplot(aes(x = Date, y = Value, colour = Variable)) +
geom_point() +
facet_grid(vars(Variable), scales = "free_y") +
labs(title = "Electricity Demand") +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Shock %>%
gather("Variable", "Value", Demand, Temperature) %>%
ggplot(aes(x = Date, y = Value, colour = Variable)) +
geom_point() +
facet_grid(vars(Variable), scales = "free_y") +
labs(title = "Electricity Demand") +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
Shock %>%
ggplot(aes(x=Temperature, y=Demand)) +
geom_point() +
geom_smooth(method="lm", se=FALSE) +
labs(title = "Electricity Demand")
## `geom_smooth()` using formula 'y ~ x'
goodshit <- Shock %>% model(TSLM(Demand ~ Temperature))
goodshit %>%
gg_tsresiduals()
gang15 <- Shock %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(new_data(Shock, 1) %>%
mutate(Temperature = 15)) %>%
autoplot(Shock)
gang15
gang35 <- Shock %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(new_data(Shock, 1) %>%
mutate(Temperature = 35)) %>%
autoplot(Shock)
gang35
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## autoplot.Arima ggfortify
## autoplot.acf ggfortify
## autoplot.ar ggfortify
## autoplot.bats ggfortify
## autoplot.decomposed.ts ggfortify
## autoplot.ets ggfortify
## autoplot.forecast ggfortify
## autoplot.stl ggfortify
## autoplot.ts ggfortify
## fitted.ar ggfortify
## fortify.ts ggfortify
## residuals.ar ggfortify
##
## Attaching package: 'forecast'
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
uhoh <- lm(Demand ~ Temperature, data = Shock)
forecast(uhoh, newdata = data.frame(Temperature=c(15,35)))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 151398.35 151398.4 117127.2 185669.5 97951.22 204845.5
## 274484.25 274484.2 241333.0 307635.5 222783.69 326184.8
detach(package:forecast, unload = TRUE)
plot(Demand~Temperature, data = vic_elec)
As the temperature increases, the demand for electricity also increases. This is due to a higher demand for air conditioning, fans, and other methods of temp control. All of these things consume a lot of electricity.
Yes the model is adiquet. There are two outliers near Jan 27. This can also be shown on the histogram (on the far left) as the left tail spikes it skews the data to the right a bit.
The 35 degree forcast works well, however the 15 degree forcast is bad. It is very low even compared to the lowest temps.
This shows that when temps are very low, people demand electricity. When temps are very high people demand electricity even more.
autoplot(olympic_running) +
facet_wrap(~Length + Sex, scales = "free") +
labs(title = "Running", x = "Year", y = "Time") +
theme(legend.position = "none")
## Plot variable not specified, automatically selected `.vars = Time`
autoplot(olympic_running) +
facet_wrap(~Length + Sex, scales = "free") +
labs(title = "Running", x = "Year", y = "Time") +
theme(legend.position = "none") +
geom_smooth(method="lm", se=FALSE)
## Plot variable not specified, automatically selected `.vars = Time`
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (stat_smooth).
olympic_lm <- lm(Year ~ ., olympic_running)
olympic_lm
##
## Call:
## lm(formula = Year ~ ., data = olympic_running)
##
## Coefficients:
## (Intercept) Length Sexwomen Time
## 1947.93654 0.05872 34.13151 -0.32515
library(forecast)
##
## Attaching package: 'forecast'
##
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
checkresiduals(olympic_lm)
##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals
## LM test = 179.57, df = 10, p-value < 2.2e-16
detach(package:forecast, unload = TRUE)
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "100") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 100)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "100") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 100)")
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "200") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 200)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "200") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 200)")
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "400") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 400)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "400") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 400)")
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "800") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 800)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "800") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 800)")
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "1500") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 1500)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "1500") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 1500)")
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "5000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 5000)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "5000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 5000)")
olympic_running %>%
filter(Sex == "men") %>%
filter(Length == "10000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Men's 10000)")
olympic_running %>%
filter(Sex == "women") %>%
filter(Length == "10000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(new_data(olympic_running, 1) %>%
mutate(Year = 2020)) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Time (Women's 10000)")
For all lengths of races, the trend seems to be trending downward. This is likely due to people becoming better and faster at running over time.
the time is decreasing by 0.32515 seconds per year
The assumption made is that people are going to continue to get better at running, this isn’t true. There will be years where times may go up. (Due to someone great retiring or something like that)
plot(souvenirs)
autoplot(souvenirs) +
labs(title = "Time Series of Australian Souvenirs")
## Plot variable not specified, automatically selected `.vars = Sales`
souvnum <- souvenirs
souvsales <- as.numeric(souvenirs$Sales)
hist(souvsales)
souvlog <- souvnum
souvlogSales <- log(souvnum$Sales)
hist(souvlogSales)
souv <- ts(souvenirs$Sales,start=c(1987,1), end=c(1993,6), frequency=12)
souvlog2 <- log(souv)
dummysurffest <- rep(0, length(souv))
dummysurffest[seq_along(dummysurffest)%%12 == 3] = 1
dummysurffest[3] = 0
dummysurffest <- ts(dummysurffest, freq = 12, start=c(1987,1))
Bothofem <- data.frame(souvlog2, dummysurffest)
library(forecast)
##
## Attaching package: 'forecast'
##
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
fitty <- tslm(souvlog2 ~ trend + season + dummysurffest, data = Bothofem)
forecastmaybe <- data.frame(dummysurffest = rep(0, 12))
forecastmaybe[3,] = 1
forecast(fitty, newdata = forecastmaybe)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 1993 9.883136 9.637329 10.128943 9.503919 10.262353
## Aug 1993 9.867772 9.621965 10.113579 9.488555 10.246989
## Sep 1993 10.531942 10.191069 10.872815 10.006062 11.057822
## Oct 1993 10.092605 9.846798 10.338412 9.713388 10.471822
## Nov 1993 10.585189 10.339382 10.830995 10.205971 10.964406
## Dec 1993 11.357556 11.111749 11.603363 10.978339 11.736773
## Jan 1994 9.430933 9.186093 9.675774 9.053207 9.808659
## Feb 1994 9.704370 9.459530 9.949210 9.326644 10.082096
## Mar 1994 9.695742 9.365756 10.025727 9.186658 10.204825
## Apr 1994 9.881046 9.636206 10.125887 9.503320 10.258772
## May 1994 9.928500 9.683659 10.173340 9.550774 10.306226
## Jun 1994 9.989861 9.745020 10.234701 9.612135 10.367587
plot(fitty$residuals)
boxplot(residuals(fitty)~cycle(residuals(fitty)))
fitty$coefficients
## (Intercept) trend season2 season3 season4
## 7.6662400 0.0207611 0.2526756 0.2232860 0.3878297
## season5 season6 season7 season8 season9
## 0.4145219 0.4551219 0.5767690 0.5406444 0.6296713
## season10 season11 season12 dummysurffest
## 0.7239552 1.1957774 1.9473841 0.5543818
checkresiduals(fitty)
##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals from Linear regression model
## LM test = 31.535, df = 17, p-value = 0.01718
forecastcoolio <- data.frame(dummysurffest = rep(0, 36))
predictions <- forecast(fitty, newdata = forecastcoolio)
predictions
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 1993 9.883136 9.637329 10.128943 9.503919 10.262353
## Aug 1993 9.867772 9.621965 10.113579 9.488555 10.246989
## Sep 1993 9.977560 9.731753 10.223367 9.598343 10.356777
## Oct 1993 10.092605 9.846798 10.338412 9.713388 10.471822
## Nov 1993 10.585189 10.339382 10.830995 10.205971 10.964406
## Dec 1993 11.357556 11.111749 11.603363 10.978339 11.736773
## Jan 1994 9.430933 9.186093 9.675774 9.053207 9.808659
## Feb 1994 9.704370 9.459530 9.949210 9.326644 10.082096
## Mar 1994 9.695742 9.365756 10.025727 9.186658 10.204825
## Apr 1994 9.881046 9.636206 10.125887 9.503320 10.258772
## May 1994 9.928500 9.683659 10.173340 9.550774 10.306226
## Jun 1994 9.989861 9.745020 10.234701 9.612135 10.367587
## Jul 1994 10.132269 9.883394 10.381144 9.748319 10.516219
## Aug 1994 10.116905 9.868031 10.365780 9.732955 10.500856
## Sep 1994 10.226693 9.977819 10.475568 9.842743 10.610644
## Oct 1994 10.341738 10.092864 10.590613 9.957788 10.725689
## Nov 1994 10.834322 10.585447 11.083197 10.450372 11.218272
## Dec 1994 11.606690 11.357815 11.855564 11.222739 11.990640
## Jan 1995 9.680067 9.431764 9.928369 9.296999 10.063134
## Feb 1995 9.953503 9.705201 10.201806 9.570436 10.336570
## Mar 1995 9.944875 9.610605 10.279144 9.429182 10.460567
## Apr 1995 10.130180 9.881877 10.378482 9.747112 10.513247
## May 1995 10.177633 9.929330 10.425935 9.794566 10.560700
## Jun 1995 10.238994 9.990691 10.487296 9.855927 10.622061
## Jul 1995 10.381402 10.128745 10.634059 9.991617 10.771188
## Aug 1995 10.366039 10.113381 10.618696 9.976253 10.755824
## Sep 1995 10.475827 10.223169 10.728484 10.086041 10.865612
## Oct 1995 10.590872 10.338214 10.843529 10.201086 10.980657
## Nov 1995 11.083455 10.830798 11.336112 10.693669 11.473240
## Dec 1995 11.855823 11.603165 12.108480 11.466037 12.245608
## Jan 1996 9.929200 9.676730 10.181669 9.539704 10.318696
## Feb 1996 10.202636 9.950167 10.455106 9.813141 10.592132
## Mar 1996 10.194008 9.854949 10.533067 9.670926 10.717090
## Apr 1996 10.379313 10.126843 10.631782 9.989817 10.768809
## May 1996 10.426766 10.174296 10.679236 10.037270 10.816262
## Jun 1996 10.488127 10.235658 10.740597 10.098631 10.877623
autoplot(predictions)
there is a huge spike in December of each year. This is probably due to it being summer during that time. (more people visiting) There is also a slight dip during 1991, this is likely due to a small recession. Overall, there is an upward trend in the data.
The very large spikes in December skew the data to not be normally distributed. When you take the log, it transforms the data to fit it into a normal distribution.
The residuals don’t show much or any trend or seasonality. This means that the residuals are white noise
These show the correlations with the variables. Seasons 11 and 12 have the highest impact on souvenir sales.
The residuals have a relatively normal distribution and there is a significant p-value.
The ljung-Box tells you if your residuals are statistically different from white noise. The lower, the more they don’t resemble white noise.
I feel like a BoxCox transformation could help fix some of the homoscedasticity in the model and help smooth out the variance of the residuals.
us_gas <- us_gasoline %>%
filter(year(Week) < "2005")
fourier_gas1 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 1)))
report(fourier_gas1)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.969489 -0.197166 -0.002252 0.200869 0.975792
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.092e+00 2.131e-02 332.782 < 2e-16 ***
## trend() 2.807e-03 5.081e-05 55.237 < 2e-16 ***
## fourier(K = 1)C1_52 -1.238e-01 1.505e-02 -8.226 9.01e-16 ***
## fourier(K = 1)S1_52 -2.383e-01 1.505e-02 -15.832 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2865 on 722 degrees of freedom
## Multiple R-squared: 0.8248, Adjusted R-squared: 0.8241
## F-statistic: 1133 on 3 and 722 DF, p-value: < 2.22e-16
augment(fourier_gas1) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
detach(package:forecast, unload = TRUE)
fourier_gas1 %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas2 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 2)))
report(fourier_gas2)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9375162 -0.1897569 -0.0006692 0.2058275 1.0016928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.094e+00 2.121e-02 334.493 < 2e-16 ***
## trend() 2.802e-03 5.057e-05 55.420 < 2e-16 ***
## fourier(K = 2)C1_52 -1.237e-01 1.497e-02 -8.265 6.71e-16 ***
## fourier(K = 2)S1_52 -2.383e-01 1.497e-02 -15.917 < 2e-16 ***
## fourier(K = 2)C2_52 4.493e-02 1.495e-02 3.006 0.00274 **
## fourier(K = 2)S2_52 1.054e-02 1.498e-02 0.704 0.48193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.285 on 720 degrees of freedom
## Multiple R-squared: 0.8271, Adjusted R-squared: 0.8259
## F-statistic: 688.8 on 5 and 720 DF, p-value: < 2.22e-16
augment(fourier_gas2) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
fourier_gas2 %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas5 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 5)))
report(fourier_gas5)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8568361 -0.1759670 -0.0002609 0.1916623 0.9380241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 2.043e-02 347.198 < 2e-16 ***
## trend() 2.798e-03 4.873e-05 57.428 < 2e-16 ***
## fourier(K = 5)C1_52 -1.242e-01 1.442e-02 -8.610 < 2e-16 ***
## fourier(K = 5)S1_52 -2.390e-01 1.442e-02 -16.570 < 2e-16 ***
## fourier(K = 5)C2_52 4.517e-02 1.440e-02 3.137 0.00178 **
## fourier(K = 5)S2_52 9.760e-03 1.443e-02 0.676 0.49898
## fourier(K = 5)C3_52 9.586e-02 1.442e-02 6.646 6e-11 ***
## fourier(K = 5)S3_52 2.543e-04 1.440e-02 0.018 0.98592
## fourier(K = 5)C4_52 2.854e-02 1.442e-02 1.979 0.04821 *
## fourier(K = 5)S4_52 2.861e-02 1.440e-02 1.987 0.04733 *
## fourier(K = 5)C5_52 -3.364e-02 1.440e-02 -2.336 0.01974 *
## fourier(K = 5)S5_52 3.123e-02 1.443e-02 2.165 0.03073 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2746 on 714 degrees of freedom
## Multiple R-squared: 0.8409, Adjusted R-squared: 0.8384
## F-statistic: 343 on 11 and 714 DF, p-value: < 2.22e-16
augment(fourier_gas5) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
fourier_gas5 %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas10 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 10)))
report(fourier_gas10)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8625657 -0.1755251 0.0003646 0.1817594 0.9806206
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.094e+00 2.002e-02 354.363 < 2e-16 ***
## trend() 2.799e-03 4.774e-05 58.625 < 2e-16 ***
## fourier(K = 10)C1_52 -1.245e-01 1.413e-02 -8.814 < 2e-16 ***
## fourier(K = 10)S1_52 -2.395e-01 1.413e-02 -16.945 < 2e-16 ***
## fourier(K = 10)C2_52 4.529e-02 1.411e-02 3.210 0.00139 **
## fourier(K = 10)S2_52 9.203e-03 1.414e-02 0.651 0.51524
## fourier(K = 10)C3_52 9.636e-02 1.413e-02 6.819 1.98e-11 ***
## fourier(K = 10)S3_52 -4.035e-06 1.411e-02 0.000 0.99977
## fourier(K = 10)C4_52 2.905e-02 1.413e-02 2.056 0.04015 *
## fourier(K = 10)S4_52 2.884e-02 1.411e-02 2.044 0.04134 *
## fourier(K = 10)C5_52 -3.349e-02 1.410e-02 -2.375 0.01783 *
## fourier(K = 10)S5_52 3.176e-02 1.413e-02 2.247 0.02494 *
## fourier(K = 10)C6_52 -6.569e-02 1.412e-02 -4.653 3.91e-06 ***
## fourier(K = 10)S6_52 2.815e-02 1.412e-02 1.993 0.04660 *
## fourier(K = 10)C7_52 -2.240e-02 1.413e-02 -1.585 0.11340
## fourier(K = 10)S7_52 3.279e-02 1.411e-02 2.324 0.02038 *
## fourier(K = 10)C8_52 -1.671e-02 1.411e-02 -1.184 0.23676
## fourier(K = 10)S8_52 -1.432e-03 1.412e-02 -0.101 0.91926
## fourier(K = 10)C9_52 -1.768e-02 1.411e-02 -1.253 0.21066
## fourier(K = 10)S9_52 -6.335e-04 1.413e-02 -0.045 0.96424
## fourier(K = 10)C10_52 1.274e-02 1.412e-02 0.902 0.36751
## fourier(K = 10)S10_52 -2.368e-02 1.411e-02 -1.678 0.09387 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.269 on 704 degrees of freedom
## Multiple R-squared: 0.8494, Adjusted R-squared: 0.8449
## F-statistic: 189.1 on 21 and 704 DF, p-value: < 2.22e-16
augment(fourier_gas10) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
fourier_gas10 %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas20 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 20)))
report(fourier_gas20)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8455538 -0.1703796 0.0009278 0.1724455 0.9995200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 2.001e-02 354.550 < 2e-16 ***
## trend() 2.798e-03 4.772e-05 58.649 < 2e-16 ***
## fourier(K = 20)C1_52 -1.245e-01 1.412e-02 -8.814 < 2e-16 ***
## fourier(K = 20)S1_52 -2.394e-01 1.413e-02 -16.946 < 2e-16 ***
## fourier(K = 20)C2_52 4.525e-02 1.410e-02 3.209 0.00139 **
## fourier(K = 20)S2_52 9.317e-03 1.413e-02 0.659 0.50988
## fourier(K = 20)C3_52 9.624e-02 1.413e-02 6.814 2.09e-11 ***
## fourier(K = 20)S3_52 3.202e-05 1.410e-02 0.002 0.99819
## fourier(K = 20)C4_52 2.895e-02 1.412e-02 2.050 0.04072 *
## fourier(K = 20)S4_52 2.877e-02 1.410e-02 2.040 0.04175 *
## fourier(K = 20)C5_52 -3.349e-02 1.410e-02 -2.376 0.01779 *
## fourier(K = 20)S5_52 3.165e-02 1.413e-02 2.240 0.02541 *
## fourier(K = 20)C6_52 -6.559e-02 1.411e-02 -4.649 4.01e-06 ***
## fourier(K = 20)S6_52 2.808e-02 1.411e-02 1.990 0.04701 *
## fourier(K = 20)C7_52 -2.229e-02 1.413e-02 -1.578 0.11498
## fourier(K = 20)S7_52 3.283e-02 1.410e-02 2.329 0.02018 *
## fourier(K = 20)C8_52 -1.668e-02 1.411e-02 -1.183 0.23735
## fourier(K = 20)S8_52 -1.323e-03 1.412e-02 -0.094 0.92536
## fourier(K = 20)C9_52 -1.775e-02 1.410e-02 -1.259 0.20862
## fourier(K = 20)S9_52 -5.484e-04 1.412e-02 -0.039 0.96903
## fourier(K = 20)C10_52 1.263e-02 1.412e-02 0.894 0.37140
## fourier(K = 20)S10_52 -2.368e-02 1.411e-02 -1.679 0.09360 .
## fourier(K = 20)C11_52 -2.835e-02 1.411e-02 -2.008 0.04500 *
## fourier(K = 20)S11_52 2.542e-02 1.411e-02 1.801 0.07208 .
## fourier(K = 20)C12_52 -1.166e-02 1.411e-02 -0.827 0.40869
## fourier(K = 20)S12_52 -2.505e-02 1.411e-02 -1.775 0.07633 .
## fourier(K = 20)C13_52 6.269e-03 1.411e-02 0.444 0.65703
## fourier(K = 20)S13_52 1.421e-02 1.411e-02 1.007 0.31442
## fourier(K = 20)C14_52 -3.088e-03 1.411e-02 -0.219 0.82685
## fourier(K = 20)S14_52 1.816e-02 1.411e-02 1.287 0.19865
## fourier(K = 20)C15_52 -1.403e-03 1.411e-02 -0.099 0.92086
## fourier(K = 20)S15_52 1.743e-02 1.411e-02 1.235 0.21708
## fourier(K = 20)C16_52 -1.224e-02 1.412e-02 -0.867 0.38624
## fourier(K = 20)S16_52 -1.567e-03 1.411e-02 -0.111 0.91160
## fourier(K = 20)C17_52 7.639e-03 1.410e-02 0.542 0.58827
## fourier(K = 20)S17_52 -2.197e-02 1.412e-02 -1.556 0.12021
## fourier(K = 20)C18_52 2.355e-03 1.411e-02 0.167 0.86749
## fourier(K = 20)S18_52 1.760e-02 1.412e-02 1.247 0.21299
## fourier(K = 20)C19_52 -2.098e-03 1.412e-02 -0.149 0.88197
## fourier(K = 20)S19_52 -7.173e-05 1.410e-02 -0.005 0.99594
## fourier(K = 20)C20_52 -2.844e-03 1.411e-02 -0.202 0.84034
## fourier(K = 20)S20_52 7.698e-04 1.411e-02 0.055 0.95652
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2688 on 684 degrees of freedom
## Multiple R-squared: 0.8538, Adjusted R-squared: 0.8451
## F-statistic: 97.46 on 41 and 684 DF, p-value: < 2.22e-16
augment(fourier_gas20) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
fourier_gas20 %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas26 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 26)))
report(fourier_gas26)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.834099 -0.172836 0.001292 0.167365 1.034800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 1.994e-02 355.756 < 2e-16 ***
## trend() 2.799e-03 4.755e-05 58.858 < 2e-16 ***
## fourier(K = 26)C1_52 -1.244e-01 1.407e-02 -8.843 < 2e-16 ***
## fourier(K = 26)S1_52 -2.394e-01 1.408e-02 -17.004 < 2e-16 ***
## fourier(K = 26)C2_52 4.527e-02 1.405e-02 3.222 0.00134 **
## fourier(K = 26)S2_52 9.340e-03 1.408e-02 0.663 0.50740
## fourier(K = 26)C3_52 9.624e-02 1.408e-02 6.837 1.81e-11 ***
## fourier(K = 26)S3_52 7.369e-05 1.405e-02 0.005 0.99582
## fourier(K = 26)C4_52 2.892e-02 1.407e-02 2.055 0.04031 *
## fourier(K = 26)S4_52 2.880e-02 1.405e-02 2.050 0.04079 *
## fourier(K = 26)C5_52 -3.356e-02 1.405e-02 -2.389 0.01719 *
## fourier(K = 26)S5_52 3.164e-02 1.408e-02 2.247 0.02494 *
## fourier(K = 26)C6_52 -6.564e-02 1.406e-02 -4.668 3.67e-06 ***
## fourier(K = 26)S6_52 2.802e-02 1.407e-02 1.992 0.04676 *
## fourier(K = 26)C7_52 -2.227e-02 1.408e-02 -1.582 0.11409
## fourier(K = 26)S7_52 3.274e-02 1.405e-02 2.330 0.02009 *
## fourier(K = 26)C8_52 -1.659e-02 1.406e-02 -1.180 0.23839
## fourier(K = 26)S8_52 -1.368e-03 1.407e-02 -0.097 0.92254
## fourier(K = 26)C9_52 -1.765e-02 1.406e-02 -1.255 0.20977
## fourier(K = 26)S9_52 -4.998e-04 1.407e-02 -0.036 0.97168
## fourier(K = 26)C10_52 1.266e-02 1.407e-02 0.900 0.36859
## fourier(K = 26)S10_52 -2.356e-02 1.406e-02 -1.676 0.09419 .
## fourier(K = 26)C11_52 -2.843e-02 1.407e-02 -2.021 0.04366 *
## fourier(K = 26)S11_52 2.553e-02 1.406e-02 1.815 0.06991 .
## fourier(K = 26)C12_52 -1.181e-02 1.406e-02 -0.840 0.40118
## fourier(K = 26)S12_52 -2.505e-02 1.407e-02 -1.781 0.07539 .
## fourier(K = 26)C13_52 6.166e-03 1.406e-02 0.438 0.66119
## fourier(K = 26)S13_52 1.409e-02 1.406e-02 1.002 0.31687
## fourier(K = 26)C14_52 -3.056e-03 1.406e-02 -0.217 0.82802
## fourier(K = 26)S14_52 1.800e-02 1.407e-02 1.279 0.20120
## fourier(K = 26)C15_52 -1.247e-03 1.407e-02 -0.089 0.92937
## fourier(K = 26)S15_52 1.735e-02 1.406e-02 1.234 0.21766
## fourier(K = 26)C16_52 -1.207e-02 1.407e-02 -0.858 0.39117
## fourier(K = 26)S16_52 -1.492e-03 1.406e-02 -0.106 0.91550
## fourier(K = 26)C17_52 7.686e-03 1.406e-02 0.547 0.58466
## fourier(K = 26)S17_52 -2.178e-02 1.407e-02 -1.548 0.12210
## fourier(K = 26)C18_52 2.236e-03 1.406e-02 0.159 0.87369
## fourier(K = 26)S18_52 1.775e-02 1.407e-02 1.262 0.20739
## fourier(K = 26)C19_52 -2.301e-03 1.408e-02 -0.163 0.87021
## fourier(K = 26)S19_52 -6.677e-05 1.405e-02 -0.005 0.99621
## fourier(K = 26)C20_52 -2.977e-03 1.406e-02 -0.212 0.83240
## fourier(K = 26)S20_52 6.098e-04 1.407e-02 0.043 0.96543
## fourier(K = 26)C21_52 6.089e-03 1.405e-02 0.433 0.66483
## fourier(K = 26)S21_52 3.927e-03 1.408e-02 0.279 0.78039
## fourier(K = 26)C22_52 -8.038e-04 1.407e-02 -0.057 0.95447
## fourier(K = 26)S22_52 -1.126e-02 1.405e-02 -0.801 0.42323
## fourier(K = 26)C23_52 -2.159e-02 1.408e-02 -1.534 0.12551
## fourier(K = 26)S23_52 1.192e-02 1.405e-02 0.848 0.39656
## fourier(K = 26)C24_52 7.003e-04 1.405e-02 0.050 0.96025
## fourier(K = 26)S24_52 1.837e-02 1.408e-02 1.305 0.19241
## fourier(K = 26)C25_52 4.907e-03 1.406e-02 0.349 0.72717
## fourier(K = 26)S25_52 7.445e-03 1.407e-02 0.529 0.59686
## fourier(K = 26)C26_52 -3.084e-02 9.944e-03 -3.101 0.00201 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2679 on 673 degrees of freedom
## Multiple R-squared: 0.8572, Adjusted R-squared: 0.8461
## F-statistic: 77.67 on 52 and 673 DF, p-value: < 2.22e-16
augment(fourier_gas26) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
fourier_gas26 %>%
forecast(h = 52) %>%
autoplot(us_gas)
gas_fit <- us_gas %>%
model(K1 = TSLM(Barrels ~ trend() + fourier(K = 1)),
K2 = TSLM(Barrels ~ trend() + fourier(K = 2)),
K5 = TSLM(Barrels ~ trend() + fourier(K = 5)),
K10 = TSLM(Barrels ~ trend() + fourier(K = 10)),
K20 = TSLM(Barrels ~ trend() + fourier(K = 20)),
K26 = TSLM(Barrels ~ trend() + fourier(K = 26)))
glance(gas_fit) %>% select(.model, r_squared, adj_r_squared, AICc)
## # A tibble: 6 × 4
## .model r_squared adj_r_squared AICc
## <chr> <dbl> <dbl> <dbl>
## 1 K1 0.825 0.824 -1809.
## 2 K2 0.827 0.826 -1814.
## 3 K5 0.841 0.838 -1862.
## 4 K10 0.849 0.845 -1881.
## 5 K20 0.854 0.845 -1859.
## 6 K26 0.857 0.846 -1851.
gas_fit <- us_gas %>%
model(K11 = TSLM(Barrels ~ trend() + fourier(K = 11)),
K12 = TSLM(Barrels ~ trend() + fourier(K = 12)),
K13 = TSLM(Barrels ~ trend() + fourier(K = 13)),
K14 = TSLM(Barrels ~ trend() + fourier(K = 14)),
K15 = TSLM(Barrels ~ trend() + fourier(K = 15)),
K16 = TSLM(Barrels ~ trend() + fourier(K = 16)),
K17 = TSLM(Barrels ~ trend() + fourier(K = 17)),
K18 = TSLM(Barrels ~ trend() + fourier(K = 18)),
K19 = TSLM(Barrels ~ trend() + fourier(K = 19)))
glance(gas_fit) %>% select(.model, r_squared, adj_r_squared, AICc)
## # A tibble: 9 × 4
## .model r_squared adj_r_squared AICc
## <chr> <dbl> <dbl> <dbl>
## 1 K11 0.851 0.846 -1885.
## 2 K12 0.852 0.847 -1884.
## 3 K13 0.852 0.846 -1881.
## 4 K14 0.852 0.846 -1879.
## 5 K15 0.853 0.846 -1876.
## 6 K16 0.853 0.846 -1872.
## 7 K17 0.853 0.846 -1871.
## 8 K18 0.854 0.846 -1868.
## 9 K19 0.854 0.846 -1864.
fourier_gas11 <- us_gas %>%
model(TSLM(Barrels ~ trend() + fourier(K = 11)))
report(fourier_gas11)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.896208 -0.177835 0.003786 0.173355 1.014511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 1.994e-02 355.717 < 2e-16 ***
## trend() 2.799e-03 4.756e-05 58.844 < 2e-16 ***
## fourier(K = 11)C1_52 -1.245e-01 1.407e-02 -8.845 < 2e-16 ***
## fourier(K = 11)S1_52 -2.394e-01 1.408e-02 -17.007 < 2e-16 ***
## fourier(K = 11)C2_52 4.529e-02 1.405e-02 3.223 0.00133 **
## fourier(K = 11)S2_52 9.256e-03 1.408e-02 0.657 0.51125
## fourier(K = 11)C3_52 9.632e-02 1.408e-02 6.842 1.70e-11 ***
## fourier(K = 11)S3_52 3.762e-05 1.405e-02 0.003 0.99787
## fourier(K = 11)C4_52 2.899e-02 1.408e-02 2.060 0.03980 *
## fourier(K = 11)S4_52 2.884e-02 1.406e-02 2.052 0.04054 *
## fourier(K = 11)C5_52 -3.354e-02 1.405e-02 -2.387 0.01725 *
## fourier(K = 11)S5_52 3.172e-02 1.408e-02 2.253 0.02458 *
## fourier(K = 11)C6_52 -6.569e-02 1.406e-02 -4.671 3.59e-06 ***
## fourier(K = 11)S6_52 2.808e-02 1.407e-02 1.996 0.04628 *
## fourier(K = 11)C7_52 -2.235e-02 1.408e-02 -1.588 0.11283
## fourier(K = 11)S7_52 3.274e-02 1.405e-02 2.330 0.02010 *
## fourier(K = 11)C8_52 -1.664e-02 1.406e-02 -1.183 0.23704
## fourier(K = 11)S8_52 -1.428e-03 1.407e-02 -0.102 0.91916
## fourier(K = 11)C9_52 -1.763e-02 1.406e-02 -1.254 0.21019
## fourier(K = 11)S9_52 -5.731e-04 1.407e-02 -0.041 0.96753
## fourier(K = 11)C10_52 1.272e-02 1.407e-02 0.904 0.36621
## fourier(K = 11)S10_52 -2.359e-02 1.406e-02 -1.678 0.09375 .
## fourier(K = 11)C11_52 -2.837e-02 1.407e-02 -2.016 0.04414 *
## fourier(K = 11)S11_52 2.555e-02 1.406e-02 1.817 0.06962 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.268 on 702 degrees of freedom
## Multiple R-squared: 0.851, Adjusted R-squared: 0.8461
## F-statistic: 174.3 on 23 and 702 DF, p-value: < 2.22e-16
augment(fourier_gas11) %>%
ggplot(aes(x = Week)) +
geom_line(aes(y = Barrels, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression") +
scale_color_manual(values = c(Data = "black", Fitted = "red"))
fourier_gas11 %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas11 %>%
gg_tsresiduals()
augment(fourier_gas11) %>%
features(.resid, ljung_box, lag = 104, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 TSLM(Barrels ~ trend() + fourier(K = 11)) 149. 0.00268
gas_plot <- us_gas %>%
model(
Mean = MEAN(Barrels),
Naive = NAIVE(Barrels),
Seasonal_naive = SNAIVE(Barrels),
Drift = RW(Barrels ~ drift()))
gas_plot %>%
forecast(h = 52) %>%
autoplot(us_gas)
fourier_gas11 %>%
forecast(h = 52) %>%
autoplot(us_gas)
The model is pretty accurate. An exception to the accuracy could be the massive drop right after 2006. The drop was still within the forecasts lower bounds so it would still be within the confidence interval.
global_economy %>%
filter(Country=="Afghanistan") %>%
tsibble(key = Code, index = Year) %>%
autoplot(Population, show.legend = FALSE) +
labs(title= "Afghanistan Population", y = "Population")
global_economy %>%
filter(Country=="Afghanistan")%>%
model(TSLM(Population ~ Year)) %>%
report()
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -5794518 -2582559 744761 2259222 6036280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -829292529 49730866 -16.68 <2e-16 ***
## Year 425774 25008 17.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3188000 on 56 degrees of freedom
## Multiple R-squared: 0.8381, Adjusted R-squared: 0.8352
## F-statistic: 289.9 on 1 and 56 DF, p-value: < 2.22e-16
global_economy %>%
filter(Country=="Afghanistan")%>%
filter(Year<1980)%>%
model(TSLM(Population ~ Year)) %>%
report()
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -146380.8 -110290.6 -451.2 105877.8 202881.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -470734527 8787062 -53.57 <2e-16 ***
## Year 244657 4462 54.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 115100 on 18 degrees of freedom
## Multiple R-squared: 0.994, Adjusted R-squared: 0.9937
## F-statistic: 3007 on 1 and 18 DF, p-value: < 2.22e-16
global_economy %>%
filter(Country=="Afghanistan")%>%
filter(Year>1989)%>%
model(TSLM(Population ~ Year)) %>%
report()
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -619234 -212927 6598 234280 612277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.670e+09 1.640e+07 -101.8 <2e-16 ***
## Year 8.451e+05 8.184e+03 103.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 349800 on 26 degrees of freedom
## Multiple R-squared: 0.9976, Adjusted R-squared: 0.9975
## F-statistic: 1.066e+04 on 1 and 26 DF, p-value: < 2.22e-16
m1 <- global_economy %>%
filter(Country=="Afghanistan")%>%
model(TSLM(Population ~ Year))
m2 <- global_economy %>%
filter(Country=="Afghanistan")%>%
filter(Year>1989)%>%
model(TSLM(Population ~ Year))
globalboi <- global_economy %>%
filter(Country=="Afghanistan")
afgh.pfit <- globalboi %>%
model(piecewise = TSLM(Population ~ trend(knots = c(1980, 1989))))
report(afgh.pfit)
## Series: Population
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -577590 -174198 -16784 187226 679947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8697573 131122 66.33 <2e-16 ***
## trend(knots = c(1980, 1989))trend 224372 9623 23.32 <2e-16 ***
## trend(knots = c(1980, 1989))trend_21 -456804 24498 -18.65 <2e-16 ***
## trend(knots = c(1980, 1989))trend_30 1082782 21418 50.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 300900 on 54 degrees of freedom
## Multiple R-squared: 0.9986, Adjusted R-squared: 0.9985
## F-statistic: 1.293e+04 on 3 and 54 DF, p-value: < 2.22e-16
afgh.pfit %>% gg_tsresiduals()
forecast(m1, h=5)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Afghanistan TSLM(Population ~ Year) 2018 N(3e+07, 1.1e+13) 29919575.
## 2 Afghanistan TSLM(Population ~ Year) 2019 N(3e+07, 1.1e+13) 30345349.
## 3 Afghanistan TSLM(Population ~ Year) 2020 N(3.1e+07, 1.1e+13) 30771123.
## 4 Afghanistan TSLM(Population ~ Year) 2021 N(3.1e+07, 1.1e+13) 31196897.
## 5 Afghanistan TSLM(Population ~ Year) 2022 N(3.2e+07, 1.1e+13) 31622671.
forecast(m2, h=5)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Afghanistan TSLM(Population ~ Year) 2018 N(3.6e+07, 1.4e+11) 35925602.
## 2 Afghanistan TSLM(Population ~ Year) 2019 N(3.7e+07, 1.4e+11) 36770747.
## 3 Afghanistan TSLM(Population ~ Year) 2020 N(3.8e+07, 1.4e+11) 37615892.
## 4 Afghanistan TSLM(Population ~ Year) 2021 N(3.8e+07, 1.5e+11) 38461037.
## 5 Afghanistan TSLM(Population ~ Year) 2022 N(3.9e+07, 1.5e+11) 39306182.
afghan_drift <- globalboi %>%
model(RW(Population ~ drift())) %>%
forecast(h = "5 years") %>%
autoplot(globalboi) +
labs(title = "Drift Forecast of Afghanistan Population", y = "Population in Millions")
afghan_drift
A drift works best, due to the way the data is. This is true because there isn’t seasonality. The drift fits well with this model. I feel like this is a pretty accurate forcast
No perfect Multicollinearity
The sum of errors is zero
Errors have a constant variance σ2 (homoscedasticity)
Error terms are not autocorrelated
Errors are unrelated to the predictor variables
Errors follow a normal distribution
A consistent estimator means that as the sample size gets larger, the estimate gets more and more accurate. An estimate is unbiased if the expected value is equal to the true value. This means that unbiased estimators aren’t effected by the size of the sample.
R squared explains the percentage of the variation in your error term that your model can explain. Adjusted R squared is identical to R squared except that adjusted R squared adjusts for degrees of freedom.