library(fpp3)
#a
vic_elec
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
## 7 2012-01-01 03:00:00 3694. 20.1 2012-01-01 TRUE
## 8 2012-01-01 03:30:00 3562. 19.6 2012-01-01 TRUE
## 9 2012-01-01 04:00:00 3433. 19.1 2012-01-01 TRUE
## 10 2012-01-01 04:30:00 3359. 19.0 2012-01-01 TRUE
## # ... with 52,598 more rows
## # i Use `print(n = ...)` to see more rows
Janelec <- vic_elec %>%
filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
index_by(Date = as_date(Time)) %>%
summarise(
Demand = sum(Demand),
Temperature = max(Temperature))
Janelec %>%
gather("Variable", "Value", Demand, Temperature) %>%
ggplot(aes(x = Date, y = Value)) +
geom_line() +
facet_grid(vars(Variable), scales = "free_y") +
labs(title = "Electricity Demand")
Janelec %>%
ggplot(aes(x=Temperature, y=Demand)) +
geom_point() +
geom_smooth(method="lm", se=FALSE) +
labs(title = "Electricity Demand")
## `geom_smooth()` using formula 'y ~ x'
#The demand for electricity likely gets higher as the temperature does due to an increased need for air conditioning. With hotter weather people are going to use more electricity to cool down their houses.
#b
elecfit <- Janelec %>%
model(TSLM(Demand ~ Temperature))
elecfit %>% gg_tsresiduals()
#The model doesn't appear to have autocorrelation, the normal distribution is pretty rough, and the residuals aren't bad there's no apparent trend. The model is adequate. It looks like there is an outlier at January 21st that's very clear from the residuals and from the histogram.
#c
demand15 <-Janelec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(Janelec, 1) %>%
mutate(Temperature = 15)) %>%
autoplot(Janelec)
demand15
demand35 <-Janelec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(Janelec, 1) %>%
mutate(Temperature = 35)) %>%
autoplot(Janelec)
demand35
#The forecast for 35 degrees looks feasible. The forecast for 15 degrees is harder to believe because the data has no record of it being that cold. Hard to say if it would be feasible or not.
#d
demand.15.hilo <- Janelec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(new_data(Janelec, 1) %>%
mutate(Temperature = 15)) %>%
hilo()
demand.15.hilo
## # A tsibble: 1 x 7 [1D]
## # Key: .model [1]
## .model Date Demand .mean Tempe~1 `80%`
## <chr> <date> <dist> <dbl> <dbl> <hilo>
## 1 TSLM(Dema~ 2014-02-01 N(151398, 6.8e+08) 1.51e5 15 [117908.1, 184888.6]80
## # ... with 1 more variable: `95%` <hilo>, and abbreviated variable name
## # 1: Temperature
## # i Use `colnames()` to see all variable names
demand.35.hilo <- Janelec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(new_data(Janelec, 1) %>%
mutate(Temperature = 35)) %>%
hilo()
demand.35.hilo
## # A tsibble: 1 x 7 [1D]
## # Key: .model [1]
## .model Date Demand .mean Tempe~1 `80%`
## <chr> <date> <dist> <dbl> <dbl> <hilo>
## 1 TSLM(Dema~ 2014-02-01 N(274484, 6.4e+08) 2.74e5 35 [242088.4, 306880.1]80
## # ... with 1 more variable: `95%` <hilo>, and abbreviated variable name
## # 1: Temperature
## # i Use `colnames()` to see all variable names
#e
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Temp")
#There is a high demand for electricity midst very high temperatures, but a relatively constant demand for electricity when it's between 0-30 degrees.
olympic_running
## # A tsibble: 312 x 4 [4Y]
## # Key: Length, Sex [14]
## Year Length Sex Time
## <int> <int> <chr> <dbl>
## 1 1896 100 men 12
## 2 1900 100 men 11
## 3 1904 100 men 11
## 4 1908 100 men 10.8
## 5 1912 100 men 10.8
## 6 1916 100 men NA
## 7 1920 100 men 10.8
## 8 1924 100 men 10.6
## 9 1928 100 men 10.8
## 10 1932 100 men 10.3
## # ... with 302 more rows
## # i Use `print(n = ...)` to see more rows
#a
autoplot(olympic_running) +
facet_wrap(~Length + Sex, scales ="free") +
labs(title = "Running", x= "Year", y= "Time") + theme(legend.position = "none")
## Plot variable not specified, automatically selected `.vars = Time`
#Times have clearly been decreasing over time in most events.
#b
regline <- lm(Time ~ ., olympic_running)
regline
##
## Call:
## lm(formula = Time ~ ., data = olympic_running)
##
## Coefficients:
## (Intercept) Year Length Sexwomen
## 734.9863 -0.3905 0.1766 32.9732
# Average times as a whole across all events have been decreasing by .39 seconds each year.
#c
library("forecast")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
##
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
checkresiduals(regline)
##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals
## LM test = 179.8, df = 10, p-value < 2.2e-16
unloadNamespace("forecast")
#This model looks horrible to be honest. The autocorrelation is all over the place and the residuals do not resemble white noise at all. The normal distribution has the most hope but it still looks pretty bad.
#d
olympics <- olympic_running
olympic_running %>%
filter(Sex=="men") %>%
filter(Length=="100") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="men") %>%
filter(Length=="200") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="men") %>%
filter(Length=="400") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="men") %>%
filter(Length=="800") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="men") %>%
filter(Length=="1500") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="men") %>%
filter(Length=="5000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="100") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="200") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="400") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="800") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="1500") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="5000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
olympic_running %>%
filter(Sex=="women") %>%
filter(Length=="10000") %>%
model(TSLM(Time ~ Year)) %>%
forecast(
new_data(olympic_running, 1) %>%
mutate(Year = 2020)
) %>%
autoplot(olympic_running) +
labs(title = "Forecasted Winning Race Time") +
theme(plot.title = element_text(hjust = 0.5))
#The prediction intervals are very broad especially for the distance event, so the likelihood of a winning time falling outside the interval is very slim. The assumption made is that times will continue to decrease (I don't think they will though).
souv <- souvenirs
#a
autoplot(souvenirs)
## Plot variable not specified, automatically selected `.vars = Sales`
#There is clear seasonality every December I'm assuming the holiday season is why. The seasonality seemed pretty constant until 1993 and 1994 when it shot up significantly for reasons I can not identify.
#b
souvlog <- souv %>%
mutate(Sales = log(Sales))
autoplot(souvlog)
## Plot variable not specified, automatically selected `.vars = Sales`
#The log serves to fix the variance in the seasonality. Fixes heteroskedasticity.
#c
souvenir1 <- ts(souvenirs$Sales,start = c(1987,1), end = c(1993,6), frequency=12)
souvenir_log2 <- log(souvenir1)
dummy_surf_fest <- rep(0, length(souvenir1))
dummy_surf_fest[seq_along(dummy_surf_fest)%%12 == 3] = 1
dummy_surf_fest[3] = 0
dummy_surf_fest <- ts(dummy_surf_fest, freq = 12, start = c(1987,1))
souvenir_final <- data.frame(souvenir_log2, dummy_surf_fest)
library(forecast)
##
## Attaching package: 'forecast'
##
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
souv.fit <- tslm(souvenir_log2 ~ trend + season + dummy_surf_fest, data = souvenir_final)
souv.forecast <- data.frame(dummy_surf_fest = rep(0, 12))
souv.forecast[3,] = 1
forecast(souv.fit, newdata = souv.forecast)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 1993 9.883136 9.637329 10.128943 9.503919 10.262353
## Aug 1993 9.867772 9.621965 10.113579 9.488555 10.246989
## Sep 1993 10.531942 10.191069 10.872815 10.006062 11.057822
## Oct 1993 10.092605 9.846798 10.338412 9.713388 10.471822
## Nov 1993 10.585189 10.339382 10.830995 10.205971 10.964406
## Dec 1993 11.357556 11.111749 11.603363 10.978339 11.736773
## Jan 1994 9.430933 9.186093 9.675774 9.053207 9.808659
## Feb 1994 9.704370 9.459530 9.949210 9.326644 10.082096
## Mar 1994 9.695742 9.365756 10.025727 9.186658 10.204825
## Apr 1994 9.881046 9.636206 10.125887 9.503320 10.258772
## May 1994 9.928500 9.683659 10.173340 9.550774 10.306226
## Jun 1994 9.989861 9.745020 10.234701 9.612135 10.367587
#d
plot(souv.fit$residuals)
#I would say these residuals resemble white noise well. There's no apparent seasonality or trend.
#e
boxplot(residuals(souv.fit)~cycle(residuals(souv.fit)))
#The boxplot reveals some problems with homoskedasticity in this model. The variation in the residuals is changing.
#f
souv.fit$coefficients
## (Intercept) trend season2 season3 season4
## 7.6662400 0.0207611 0.2526756 0.2232860 0.3878297
## season5 season6 season7 season8 season9
## 0.4145219 0.4551219 0.5767690 0.5406444 0.6296713
## season10 season11 season12 dummy_surf_fest
## 0.7239552 1.1957774 1.9473841 0.5543818
#The coefficients show correlation with the variables. Season 11 and season 12 have the strongest correlation and thus the highest impact on souvenir sales.
#g
checkresiduals(souv.fit)
##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals from Linear regression model
## LM test = 31.535, df = 17, p-value = 0.01718
# The L-jung box test is used to tell whether your residuals are statistically different from white noise. The lower the yielded value is the less we can say it is statistically different from white noise.
#h
souv.forecast2 <- data.frame(dummy_surf_fest = rep(0, 36))
souv.forecast3 <- forecast(souv.fit, newdata = souv.forecast2)
souv.forecast3
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 1993 9.883136 9.637329 10.128943 9.503919 10.262353
## Aug 1993 9.867772 9.621965 10.113579 9.488555 10.246989
## Sep 1993 9.977560 9.731753 10.223367 9.598343 10.356777
## Oct 1993 10.092605 9.846798 10.338412 9.713388 10.471822
## Nov 1993 10.585189 10.339382 10.830995 10.205971 10.964406
## Dec 1993 11.357556 11.111749 11.603363 10.978339 11.736773
## Jan 1994 9.430933 9.186093 9.675774 9.053207 9.808659
## Feb 1994 9.704370 9.459530 9.949210 9.326644 10.082096
## Mar 1994 9.695742 9.365756 10.025727 9.186658 10.204825
## Apr 1994 9.881046 9.636206 10.125887 9.503320 10.258772
## May 1994 9.928500 9.683659 10.173340 9.550774 10.306226
## Jun 1994 9.989861 9.745020 10.234701 9.612135 10.367587
## Jul 1994 10.132269 9.883394 10.381144 9.748319 10.516219
## Aug 1994 10.116905 9.868031 10.365780 9.732955 10.500856
## Sep 1994 10.226693 9.977819 10.475568 9.842743 10.610644
## Oct 1994 10.341738 10.092864 10.590613 9.957788 10.725689
## Nov 1994 10.834322 10.585447 11.083197 10.450372 11.218272
## Dec 1994 11.606690 11.357815 11.855564 11.222739 11.990640
## Jan 1995 9.680067 9.431764 9.928369 9.296999 10.063134
## Feb 1995 9.953503 9.705201 10.201806 9.570436 10.336570
## Mar 1995 9.944875 9.610605 10.279144 9.429182 10.460567
## Apr 1995 10.130180 9.881877 10.378482 9.747112 10.513247
## May 1995 10.177633 9.929330 10.425935 9.794566 10.560700
## Jun 1995 10.238994 9.990691 10.487296 9.855927 10.622061
## Jul 1995 10.381402 10.128745 10.634059 9.991617 10.771188
## Aug 1995 10.366039 10.113381 10.618696 9.976253 10.755824
## Sep 1995 10.475827 10.223169 10.728484 10.086041 10.865612
## Oct 1995 10.590872 10.338214 10.843529 10.201086 10.980657
## Nov 1995 11.083455 10.830798 11.336112 10.693669 11.473240
## Dec 1995 11.855823 11.603165 12.108480 11.466037 12.245608
## Jan 1996 9.929200 9.676730 10.181669 9.539704 10.318696
## Feb 1996 10.202636 9.950167 10.455106 9.813141 10.592132
## Mar 1996 10.194008 9.854949 10.533067 9.670926 10.717090
## Apr 1996 10.379313 10.126843 10.631782 9.989817 10.768809
## May 1996 10.426766 10.174296 10.679236 10.037270 10.816262
## Jun 1996 10.488127 10.235658 10.740597 10.098631 10.877623
autoplot(souv.forecast3)
unloadNamespace("forecast")
#i
#A box cox could certainly help address the issue with heteroskedasticity and smooth out the variance in the residuals. Check the variables and see if the addition of a sqaured term could help the model.
us_gasoline
## # A tsibble: 1,355 x 2 [1W]
## Week Barrels
## <week> <dbl>
## 1 1991 W06 6.62
## 2 1991 W07 6.43
## 3 1991 W08 6.58
## 4 1991 W09 7.22
## 5 1991 W10 6.88
## 6 1991 W11 6.95
## 7 1991 W12 7.33
## 8 1991 W13 6.78
## 9 1991 W14 7.50
## 10 1991 W15 6.92
## # ... with 1,345 more rows
## # i Use `print(n = ...)` to see more rows
gas.filter <- us_gasoline %>%
filter(year(Week) < 2005)
fourier.gas1 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=1)))
report(fourier.gas1)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.969489 -0.197166 -0.002252 0.200869 0.975792
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.092e+00 2.131e-02 332.782 < 2e-16 ***
## trend() 2.807e-03 5.081e-05 55.237 < 2e-16 ***
## fourier(K = 1)C1_52 -1.238e-01 1.505e-02 -8.226 9.01e-16 ***
## fourier(K = 1)S1_52 -2.383e-01 1.505e-02 -15.832 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2865 on 722 degrees of freedom
## Multiple R-squared: 0.8248, Adjusted R-squared: 0.8241
## F-statistic: 1133 on 3 and 722 DF, p-value: < 2.22e-16
fourier.gas1 %>%
forecast(h=52) %>%
autoplot(gas.filter)
fourier.gas2 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=2)))
report(fourier.gas2)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9375162 -0.1897569 -0.0006692 0.2058275 1.0016928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.094e+00 2.121e-02 334.493 < 2e-16 ***
## trend() 2.802e-03 5.057e-05 55.420 < 2e-16 ***
## fourier(K = 2)C1_52 -1.237e-01 1.497e-02 -8.265 6.71e-16 ***
## fourier(K = 2)S1_52 -2.383e-01 1.497e-02 -15.917 < 2e-16 ***
## fourier(K = 2)C2_52 4.493e-02 1.495e-02 3.006 0.00274 **
## fourier(K = 2)S2_52 1.054e-02 1.498e-02 0.704 0.48193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.285 on 720 degrees of freedom
## Multiple R-squared: 0.8271, Adjusted R-squared: 0.8259
## F-statistic: 688.8 on 5 and 720 DF, p-value: < 2.22e-16
fourier.gas2 %>%
forecast(h=52) %>%
autoplot(gas.filter)
fourier.gas5 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=5)))
report(fourier.gas5)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8568361 -0.1759670 -0.0002609 0.1916623 0.9380241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 2.043e-02 347.198 < 2e-16 ***
## trend() 2.798e-03 4.873e-05 57.428 < 2e-16 ***
## fourier(K = 5)C1_52 -1.242e-01 1.442e-02 -8.610 < 2e-16 ***
## fourier(K = 5)S1_52 -2.390e-01 1.442e-02 -16.570 < 2e-16 ***
## fourier(K = 5)C2_52 4.517e-02 1.440e-02 3.137 0.00178 **
## fourier(K = 5)S2_52 9.760e-03 1.443e-02 0.676 0.49898
## fourier(K = 5)C3_52 9.586e-02 1.442e-02 6.646 6e-11 ***
## fourier(K = 5)S3_52 2.543e-04 1.440e-02 0.018 0.98592
## fourier(K = 5)C4_52 2.854e-02 1.442e-02 1.979 0.04821 *
## fourier(K = 5)S4_52 2.861e-02 1.440e-02 1.987 0.04733 *
## fourier(K = 5)C5_52 -3.364e-02 1.440e-02 -2.336 0.01974 *
## fourier(K = 5)S5_52 3.123e-02 1.443e-02 2.165 0.03073 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2746 on 714 degrees of freedom
## Multiple R-squared: 0.8409, Adjusted R-squared: 0.8384
## F-statistic: 343 on 11 and 714 DF, p-value: < 2.22e-16
fourier.gas5 %>%
forecast(h=52) %>%
autoplot(gas.filter)
fourier.gas10 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=10)))
report(fourier.gas10)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8625657 -0.1755251 0.0003646 0.1817594 0.9806206
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.094e+00 2.002e-02 354.363 < 2e-16 ***
## trend() 2.799e-03 4.774e-05 58.625 < 2e-16 ***
## fourier(K = 10)C1_52 -1.245e-01 1.413e-02 -8.814 < 2e-16 ***
## fourier(K = 10)S1_52 -2.395e-01 1.413e-02 -16.945 < 2e-16 ***
## fourier(K = 10)C2_52 4.529e-02 1.411e-02 3.210 0.00139 **
## fourier(K = 10)S2_52 9.203e-03 1.414e-02 0.651 0.51524
## fourier(K = 10)C3_52 9.636e-02 1.413e-02 6.819 1.98e-11 ***
## fourier(K = 10)S3_52 -4.035e-06 1.411e-02 0.000 0.99977
## fourier(K = 10)C4_52 2.905e-02 1.413e-02 2.056 0.04015 *
## fourier(K = 10)S4_52 2.884e-02 1.411e-02 2.044 0.04134 *
## fourier(K = 10)C5_52 -3.349e-02 1.410e-02 -2.375 0.01783 *
## fourier(K = 10)S5_52 3.176e-02 1.413e-02 2.247 0.02494 *
## fourier(K = 10)C6_52 -6.569e-02 1.412e-02 -4.653 3.91e-06 ***
## fourier(K = 10)S6_52 2.815e-02 1.412e-02 1.993 0.04660 *
## fourier(K = 10)C7_52 -2.240e-02 1.413e-02 -1.585 0.11340
## fourier(K = 10)S7_52 3.279e-02 1.411e-02 2.324 0.02038 *
## fourier(K = 10)C8_52 -1.671e-02 1.411e-02 -1.184 0.23676
## fourier(K = 10)S8_52 -1.432e-03 1.412e-02 -0.101 0.91926
## fourier(K = 10)C9_52 -1.768e-02 1.411e-02 -1.253 0.21066
## fourier(K = 10)S9_52 -6.335e-04 1.413e-02 -0.045 0.96424
## fourier(K = 10)C10_52 1.274e-02 1.412e-02 0.902 0.36751
## fourier(K = 10)S10_52 -2.368e-02 1.411e-02 -1.678 0.09387 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.269 on 704 degrees of freedom
## Multiple R-squared: 0.8494, Adjusted R-squared: 0.8449
## F-statistic: 189.1 on 21 and 704 DF, p-value: < 2.22e-16
fourier.gas10 %>%
forecast(h=52) %>%
autoplot(gas.filter)
fourier.gas11 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=11)))
report(fourier.gas11) %>%
forecast(h=52) %>%
autoplot(gas.filter)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.896208 -0.177835 0.003786 0.173355 1.014511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 1.994e-02 355.717 < 2e-16 ***
## trend() 2.799e-03 4.756e-05 58.844 < 2e-16 ***
## fourier(K = 11)C1_52 -1.245e-01 1.407e-02 -8.845 < 2e-16 ***
## fourier(K = 11)S1_52 -2.394e-01 1.408e-02 -17.007 < 2e-16 ***
## fourier(K = 11)C2_52 4.529e-02 1.405e-02 3.223 0.00133 **
## fourier(K = 11)S2_52 9.256e-03 1.408e-02 0.657 0.51125
## fourier(K = 11)C3_52 9.632e-02 1.408e-02 6.842 1.70e-11 ***
## fourier(K = 11)S3_52 3.762e-05 1.405e-02 0.003 0.99787
## fourier(K = 11)C4_52 2.899e-02 1.408e-02 2.060 0.03980 *
## fourier(K = 11)S4_52 2.884e-02 1.406e-02 2.052 0.04054 *
## fourier(K = 11)C5_52 -3.354e-02 1.405e-02 -2.387 0.01725 *
## fourier(K = 11)S5_52 3.172e-02 1.408e-02 2.253 0.02458 *
## fourier(K = 11)C6_52 -6.569e-02 1.406e-02 -4.671 3.59e-06 ***
## fourier(K = 11)S6_52 2.808e-02 1.407e-02 1.996 0.04628 *
## fourier(K = 11)C7_52 -2.235e-02 1.408e-02 -1.588 0.11283
## fourier(K = 11)S7_52 3.274e-02 1.405e-02 2.330 0.02010 *
## fourier(K = 11)C8_52 -1.664e-02 1.406e-02 -1.183 0.23704
## fourier(K = 11)S8_52 -1.428e-03 1.407e-02 -0.102 0.91916
## fourier(K = 11)C9_52 -1.763e-02 1.406e-02 -1.254 0.21019
## fourier(K = 11)S9_52 -5.731e-04 1.407e-02 -0.041 0.96753
## fourier(K = 11)C10_52 1.272e-02 1.407e-02 0.904 0.36621
## fourier(K = 11)S10_52 -2.359e-02 1.406e-02 -1.678 0.09375 .
## fourier(K = 11)C11_52 -2.837e-02 1.407e-02 -2.016 0.04414 *
## fourier(K = 11)S11_52 2.555e-02 1.406e-02 1.817 0.06962 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.268 on 702 degrees of freedom
## Multiple R-squared: 0.851, Adjusted R-squared: 0.8461
## F-statistic: 174.3 on 23 and 702 DF, p-value: < 2.22e-16
fourier.gas20 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=20)))
report(fourier.gas20)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8455538 -0.1703796 0.0009278 0.1724455 0.9995200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 2.001e-02 354.550 < 2e-16 ***
## trend() 2.798e-03 4.772e-05 58.649 < 2e-16 ***
## fourier(K = 20)C1_52 -1.245e-01 1.412e-02 -8.814 < 2e-16 ***
## fourier(K = 20)S1_52 -2.394e-01 1.413e-02 -16.946 < 2e-16 ***
## fourier(K = 20)C2_52 4.525e-02 1.410e-02 3.209 0.00139 **
## fourier(K = 20)S2_52 9.317e-03 1.413e-02 0.659 0.50988
## fourier(K = 20)C3_52 9.624e-02 1.413e-02 6.814 2.09e-11 ***
## fourier(K = 20)S3_52 3.202e-05 1.410e-02 0.002 0.99819
## fourier(K = 20)C4_52 2.895e-02 1.412e-02 2.050 0.04072 *
## fourier(K = 20)S4_52 2.877e-02 1.410e-02 2.040 0.04175 *
## fourier(K = 20)C5_52 -3.349e-02 1.410e-02 -2.376 0.01779 *
## fourier(K = 20)S5_52 3.165e-02 1.413e-02 2.240 0.02541 *
## fourier(K = 20)C6_52 -6.559e-02 1.411e-02 -4.649 4.01e-06 ***
## fourier(K = 20)S6_52 2.808e-02 1.411e-02 1.990 0.04701 *
## fourier(K = 20)C7_52 -2.229e-02 1.413e-02 -1.578 0.11498
## fourier(K = 20)S7_52 3.283e-02 1.410e-02 2.329 0.02018 *
## fourier(K = 20)C8_52 -1.668e-02 1.411e-02 -1.183 0.23735
## fourier(K = 20)S8_52 -1.323e-03 1.412e-02 -0.094 0.92536
## fourier(K = 20)C9_52 -1.775e-02 1.410e-02 -1.259 0.20862
## fourier(K = 20)S9_52 -5.484e-04 1.412e-02 -0.039 0.96903
## fourier(K = 20)C10_52 1.263e-02 1.412e-02 0.894 0.37140
## fourier(K = 20)S10_52 -2.368e-02 1.411e-02 -1.679 0.09360 .
## fourier(K = 20)C11_52 -2.835e-02 1.411e-02 -2.008 0.04500 *
## fourier(K = 20)S11_52 2.542e-02 1.411e-02 1.801 0.07208 .
## fourier(K = 20)C12_52 -1.166e-02 1.411e-02 -0.827 0.40869
## fourier(K = 20)S12_52 -2.505e-02 1.411e-02 -1.775 0.07633 .
## fourier(K = 20)C13_52 6.269e-03 1.411e-02 0.444 0.65703
## fourier(K = 20)S13_52 1.421e-02 1.411e-02 1.007 0.31442
## fourier(K = 20)C14_52 -3.088e-03 1.411e-02 -0.219 0.82685
## fourier(K = 20)S14_52 1.816e-02 1.411e-02 1.287 0.19865
## fourier(K = 20)C15_52 -1.403e-03 1.411e-02 -0.099 0.92086
## fourier(K = 20)S15_52 1.743e-02 1.411e-02 1.235 0.21708
## fourier(K = 20)C16_52 -1.224e-02 1.412e-02 -0.867 0.38624
## fourier(K = 20)S16_52 -1.567e-03 1.411e-02 -0.111 0.91160
## fourier(K = 20)C17_52 7.639e-03 1.410e-02 0.542 0.58827
## fourier(K = 20)S17_52 -2.197e-02 1.412e-02 -1.556 0.12021
## fourier(K = 20)C18_52 2.355e-03 1.411e-02 0.167 0.86749
## fourier(K = 20)S18_52 1.760e-02 1.412e-02 1.247 0.21299
## fourier(K = 20)C19_52 -2.098e-03 1.412e-02 -0.149 0.88197
## fourier(K = 20)S19_52 -7.173e-05 1.410e-02 -0.005 0.99594
## fourier(K = 20)C20_52 -2.844e-03 1.411e-02 -0.202 0.84034
## fourier(K = 20)S20_52 7.698e-04 1.411e-02 0.055 0.95652
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2688 on 684 degrees of freedom
## Multiple R-squared: 0.8538, Adjusted R-squared: 0.8451
## F-statistic: 97.46 on 41 and 684 DF, p-value: < 2.22e-16
fourier.gas20 %>%
forecast(h=52) %>%
autoplot(gas.filter)
fourier.gas26 <- gas.filter %>%
model(TSLM(Barrels ~ trend() + fourier(K=26)))
report(fourier.gas26)
## Series: Barrels
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.834099 -0.172836 0.001292 0.167365 1.034800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.095e+00 1.994e-02 355.756 < 2e-16 ***
## trend() 2.799e-03 4.755e-05 58.858 < 2e-16 ***
## fourier(K = 26)C1_52 -1.244e-01 1.407e-02 -8.843 < 2e-16 ***
## fourier(K = 26)S1_52 -2.394e-01 1.408e-02 -17.004 < 2e-16 ***
## fourier(K = 26)C2_52 4.527e-02 1.405e-02 3.222 0.00134 **
## fourier(K = 26)S2_52 9.340e-03 1.408e-02 0.663 0.50740
## fourier(K = 26)C3_52 9.624e-02 1.408e-02 6.837 1.81e-11 ***
## fourier(K = 26)S3_52 7.369e-05 1.405e-02 0.005 0.99582
## fourier(K = 26)C4_52 2.892e-02 1.407e-02 2.055 0.04031 *
## fourier(K = 26)S4_52 2.880e-02 1.405e-02 2.050 0.04079 *
## fourier(K = 26)C5_52 -3.356e-02 1.405e-02 -2.389 0.01719 *
## fourier(K = 26)S5_52 3.164e-02 1.408e-02 2.247 0.02494 *
## fourier(K = 26)C6_52 -6.564e-02 1.406e-02 -4.668 3.67e-06 ***
## fourier(K = 26)S6_52 2.802e-02 1.407e-02 1.992 0.04676 *
## fourier(K = 26)C7_52 -2.227e-02 1.408e-02 -1.582 0.11409
## fourier(K = 26)S7_52 3.274e-02 1.405e-02 2.330 0.02009 *
## fourier(K = 26)C8_52 -1.659e-02 1.406e-02 -1.180 0.23839
## fourier(K = 26)S8_52 -1.368e-03 1.407e-02 -0.097 0.92254
## fourier(K = 26)C9_52 -1.765e-02 1.406e-02 -1.255 0.20977
## fourier(K = 26)S9_52 -4.998e-04 1.407e-02 -0.036 0.97168
## fourier(K = 26)C10_52 1.266e-02 1.407e-02 0.900 0.36859
## fourier(K = 26)S10_52 -2.356e-02 1.406e-02 -1.676 0.09419 .
## fourier(K = 26)C11_52 -2.843e-02 1.407e-02 -2.021 0.04366 *
## fourier(K = 26)S11_52 2.553e-02 1.406e-02 1.815 0.06991 .
## fourier(K = 26)C12_52 -1.181e-02 1.406e-02 -0.840 0.40118
## fourier(K = 26)S12_52 -2.505e-02 1.407e-02 -1.781 0.07539 .
## fourier(K = 26)C13_52 6.166e-03 1.406e-02 0.438 0.66119
## fourier(K = 26)S13_52 1.409e-02 1.406e-02 1.002 0.31687
## fourier(K = 26)C14_52 -3.056e-03 1.406e-02 -0.217 0.82802
## fourier(K = 26)S14_52 1.800e-02 1.407e-02 1.279 0.20120
## fourier(K = 26)C15_52 -1.247e-03 1.407e-02 -0.089 0.92937
## fourier(K = 26)S15_52 1.735e-02 1.406e-02 1.234 0.21766
## fourier(K = 26)C16_52 -1.207e-02 1.407e-02 -0.858 0.39117
## fourier(K = 26)S16_52 -1.492e-03 1.406e-02 -0.106 0.91550
## fourier(K = 26)C17_52 7.686e-03 1.406e-02 0.547 0.58466
## fourier(K = 26)S17_52 -2.178e-02 1.407e-02 -1.548 0.12210
## fourier(K = 26)C18_52 2.236e-03 1.406e-02 0.159 0.87369
## fourier(K = 26)S18_52 1.775e-02 1.407e-02 1.262 0.20739
## fourier(K = 26)C19_52 -2.301e-03 1.408e-02 -0.163 0.87021
## fourier(K = 26)S19_52 -6.677e-05 1.405e-02 -0.005 0.99621
## fourier(K = 26)C20_52 -2.977e-03 1.406e-02 -0.212 0.83240
## fourier(K = 26)S20_52 6.098e-04 1.407e-02 0.043 0.96543
## fourier(K = 26)C21_52 6.089e-03 1.405e-02 0.433 0.66483
## fourier(K = 26)S21_52 3.927e-03 1.408e-02 0.279 0.78039
## fourier(K = 26)C22_52 -8.038e-04 1.407e-02 -0.057 0.95447
## fourier(K = 26)S22_52 -1.126e-02 1.405e-02 -0.801 0.42323
## fourier(K = 26)C23_52 -2.159e-02 1.408e-02 -1.534 0.12551
## fourier(K = 26)S23_52 1.192e-02 1.405e-02 0.848 0.39656
## fourier(K = 26)C24_52 7.003e-04 1.405e-02 0.050 0.96025
## fourier(K = 26)S24_52 1.837e-02 1.408e-02 1.305 0.19241
## fourier(K = 26)C25_52 4.907e-03 1.406e-02 0.349 0.72717
## fourier(K = 26)S25_52 7.445e-03 1.407e-02 0.529 0.59686
## fourier(K = 26)C26_52 -3.084e-02 9.944e-03 -3.101 0.00201 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2679 on 673 degrees of freedom
## Multiple R-squared: 0.8572, Adjusted R-squared: 0.8461
## F-statistic: 77.67 on 52 and 673 DF, p-value: < 2.22e-16
fourier.gas26 %>%
forecast(h=52) %>%
autoplot(gas.filter)
# Where K=11 looks to be the best value to use. It get's better up to K=11 and then get's worse after and that's clear on the plots. I went ahead and included all of the forecasts here to see which one yielde dthe best result and it's clear its where K=11.
#b
gas.fit <- gas.filter %>%
model(K1 = TSLM(Barrels ~ trend() + fourier(K=1)),
K2 = TSLM(Barrels ~ trend() + fourier(K=2)),
K5 = TSLM(Barrels ~ trend() + fourier(K=5)),
K10 = TSLM(Barrels ~ trend() + fourier(K=10)),
K11 = TSLM(Barrels ~ trend() + fourier(K=11)),
K20 = TSLM(Barrels ~ trend() + fourier(K=20)),
K26 = TSLM(Barrels ~ trend() + fourier(K=26)))
glance(gas.fit) %>%
select(.model, r_squared, adj_r_squared, AICc)
## # A tibble: 7 x 4
## .model r_squared adj_r_squared AICc
## <chr> <dbl> <dbl> <dbl>
## 1 K1 0.825 0.824 -1809.
## 2 K2 0.827 0.826 -1814.
## 3 K5 0.841 0.838 -1862.
## 4 K10 0.849 0.845 -1881.
## 5 K11 0.851 0.846 -1885.
## 6 K20 0.854 0.845 -1859.
## 7 K26 0.857 0.846 -1851.
#We should stop at K=11 because that's where the AICc starts to increase.
#c
fourier.gas11 %>% gg_tsresiduals()
augment(fourier.gas11) %>%
features(.resid,ljung_box, lag =104, dof=0)
## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 TSLM(Barrels ~ trend() + fourier(K = 11)) 149. 0.00268
#d
fourier.gas11 %>%
forecast(h=52) %>%
autoplot(gas.filter)
# The forecast looks very good. It fits the increasing trend and seasonality.
#a
global_economy
## # A tsibble: 15,150 x 9 [1Y]
## # Key: Country [263]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
## 7 Afghanistan AFG 1966 1399999967. NA NA 18.6 8.57 10152331
## 8 Afghanistan AFG 1967 1673333418. NA NA 14.2 6.77 10372630
## 9 Afghanistan AFG 1968 1373333367. NA NA 15.2 8.90 10604346
## 10 Afghanistan AFG 1969 1408888922. NA NA 15.0 10.1 10854428
## # ... with 15,140 more rows
## # i Use `print(n = ...)` to see more rows
autoplot(global_economy)
## Plot variable not specified, automatically selected `.vars = GDP`
## Warning: Removed 3242 row(s) containing missing values (geom_path).
global_econ1 <- global_economy %>%
filter(Country == "Afghanistan") %>%
mutate(Population.in.millions = Population/1000000)
global_econ1
## # A tsibble: 58 x 10 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Popul~1 Popul~2
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 5.38e8 NA NA 7.02 4.13 9.00e6 9.00
## 2 Afghanistan AFG 1961 5.49e8 NA NA 8.10 4.45 9.17e6 9.17
## 3 Afghanistan AFG 1962 5.47e8 NA NA 9.35 4.88 9.35e6 9.35
## 4 Afghanistan AFG 1963 7.51e8 NA NA 16.9 9.17 9.53e6 9.53
## 5 Afghanistan AFG 1964 8.00e8 NA NA 18.1 8.89 9.73e6 9.73
## 6 Afghanistan AFG 1965 1.01e9 NA NA 21.4 11.3 9.94e6 9.94
## 7 Afghanistan AFG 1966 1.40e9 NA NA 18.6 8.57 1.02e7 10.2
## 8 Afghanistan AFG 1967 1.67e9 NA NA 14.2 6.77 1.04e7 10.4
## 9 Afghanistan AFG 1968 1.37e9 NA NA 15.2 8.90 1.06e7 10.6
## 10 Afghanistan AFG 1969 1.41e9 NA NA 15.0 10.1 1.09e7 10.9
## # ... with 48 more rows, and abbreviated variable names 1: Population,
## # 2: Population.in.millions
## # i Use `print(n = ...)` to see more rows
autoplot(global_econ1, Population.in.millions)
#There is a clear decrease in population from 1979-1989 when the Soviet Afghan war took place.
#b
afgh.fit <- global_econ1 %>%
model(lm = TSLM(Population.in.millions ~ Year))
report(afgh.fit)
## Series: Population.in.millions
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7945 -2.5826 0.7448 2.2592 6.0363
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -829.29253 49.73087 -16.68 <2e-16 ***
## Year 0.42577 0.02501 17.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.188 on 56 degrees of freedom
## Multiple R-squared: 0.8381, Adjusted R-squared: 0.8352
## F-statistic: 289.9 on 1 and 56 DF, p-value: < 2.22e-16
afgh.fit %>% gg_tsresiduals()
#This looks awful. The innovation residuals have a clear trend and look nothing like white noise. There is autocorrelation all over the place and it does not appear to be normally distributed.
afgh.pfit <- global_econ1 %>%
model(piecewise = TSLM(Population.in.millions ~ trend(knots = c(1980, 1989))))
report(afgh.pfit)
## Series: Population.in.millions
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57759 -0.17420 -0.01678 0.18723 0.67995
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.697573 0.131122 66.33 <2e-16 ***
## trend(knots = c(1980, 1989))trend 0.224372 0.009623 23.32 <2e-16 ***
## trend(knots = c(1980, 1989))trend_21 -0.456803 0.024498 -18.65 <2e-16 ***
## trend(knots = c(1980, 1989))trend_30 1.082782 0.021418 50.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3009 on 54 degrees of freedom
## Multiple R-squared: 0.9986, Adjusted R-squared: 0.9985
## F-statistic: 1.293e+04 on 3 and 54 DF, p-value: < 2.22e-16
afgh.pfit %>% gg_tsresiduals()
#I would say this model close resembles white noise, has less autocorrelation, and appears to be more normally distributed. I would go with the piece wise model for sure.
#c
global_econ1 %>%
model(RW(Population.in.millions ~ drift())) %>%
forecast(h= "5 years") %>%
autoplot(global_econ1) +
geom_line(data = slice(global_econ1, range(cumsum(!is.na(Population.in.millions)))),
aes(y=Population.in.millions), linetype = "dashed", colour = "#0072B2")
#I think the drift fits the data very well.
###Exercise 6
#a
Our model must be linear, correctly specified, and have an additive error term. The model has to be set up correctly in order for the regression to produce a good forecast.
No perfect multicollinearity: Two dependent variables being perfect functions of one another.
The population mean of errors is 0. Otherwise the forecast will be systematically biased.
No heteroskedasticity. We want the variation in our residuals to be constant.
All explanatory variables must be uncorrelated with the error term. There will be bias if anything in your model is correlated with the error term.
The error term must be normally distributed to easily produce prediction intervals.
No serial correlation (autocorrelation). If there is serial correlation the forecasts will be inefficient.
#b
A consistent estimator gets closer and closer as the sample size increases. An unbiased estimator is not affected by an increasing sample size and in order for an estimate to be unbiased its expected value must equal the true parameter value.
#c
Adjusted R squared is adjusted for degrees of freedom. If you increase the sample size ad-nauseam R squared will either stay the same or increase it will never decrease. By using adjusted R squared and adjusting for degrees of freedom you are taking into account the sample size to get a much more accurate account of how much of your variation was truly explained by your model.