hw4

Questions

Exercise 1

vic2014 <- vic_elec %>% 
  slice(35089:36576) %>% 
  index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Max_temperature = max(Temperature))
autoplot(vic2014)+
  labs(title="Demand for electricity in January 2014")

## Plot variable not specified, automatically selected `.vars = Demand`

vic2014 %>%
  ggplot(aes(x = Max_temperature, y = Demand)) +
  labs(y = "Demand",
       x = "Maximum Temp") +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)

## `geom_smooth()` using formula 'y ~ x'

#there is a positive relationship because as temp rises, more people want to air condition their homes.
#b
fit <- vic2014 %>%
  model(TSLM(Demand ~ Max_temperature)) %>%
  report()

## Series: Demand 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -49978.2 -10218.9   -121.3  18533.2  35440.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      59083.9    17424.8   3.391  0.00203 ** 
## Max_temperature   6154.3      601.3  10.235 3.89e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24540 on 29 degrees of freedom
## Multiple R-squared: 0.7832,  Adjusted R-squared: 0.7757
## F-statistic: 104.7 on 1 and 29 DF, p-value: 3.8897e-11

fit %>% gg_tsresiduals()

#this is clearly not a normal distribution. I think the problem with this may be our small sample size. There may be a very small trend in our innovation residuals, but I could understand someone disagreeing with me. I dont see any outliers, but the reason why i'm doubtful to say the data has a trend is the jan 26-27 huge decrease, so that may be considered influential. Autocorrelation is really good, within the blue lines. 

#c
hitemp <- vic2014 %>% 
  model(TSLM(Demand~Max_temperature)) %>% 
  forecast(new_data(vic2014, 1) %>% 
  mutate(Max_temperature=35))
autoplot(hitemp)+
  autolayer(vic2014)

## Plot variable not specified, automatically selected `.vars = Demand`

lowtemp <- vic2014 %>% 
  model(TSLM(Demand~Max_temperature)) %>% 
  forecast(new_data(vic2014, 1) %>% 
  mutate(Max_temperature=15))
autoplot(lowtemp)+
  autolayer(vic2014)

## Plot variable not specified, automatically selected `.vars = Demand`

#i think the hitemp is a reasonable forecast because it fits with the trend, however I do not think the low temp is good because it is much lower than all the other observations. 

lowtemp %>% 
  hilo() %>% 
  select("80%")

## # A tsibble: 1 x 2 [1D]
##                    `80%` Date      
##                   <hilo> <date>    
## 1 [117908.1, 184888.6]80 2014-02-01

hitemp %>% 
  hilo() %>% 
  select("80%")

## # A tsibble: 1 x 2 [1D]
##                    `80%` Date      
##                   <hilo> <date>    
## 1 [242088.4, 306880.1]80 2014-02-01

vicag <- vic_elec %>% 
   index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Max_temperature = max(Temperature))
ggplot(data=vicag)+
  geom_point(mapping = aes(x=Max_temperature, y= Demand))

#it looks like as temperature increases to about 25 C, demand decreases, then rises. I think this shows that my model was pretty accurate.

Exercise 2

#a
autoplot(olympic_running) +
  facet_wrap(~Length + Sex, scales = "free")

## Plot variable not specified, automatically selected `.vars = Time`

autoplot(olympic_running) +
  facet_wrap(~Length + Sex, scales = "free") +
  labs(title = "Running", x = "Year", y = "Time") +
  theme(legend.position = "none") +
  geom_smooth(method="lm", se=FALSE)

## Plot variable not specified, automatically selected `.vars = Time`
## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 31 rows containing non-finite values (stat_smooth).

#b
olympic_lm <- lm(Year ~ ., olympic_running)
olympic_lm

## 
## Call:
## lm(formula = Year ~ ., data = olympic_running)
## 
## Coefficients:
## (Intercept)       Length     Sexwomen         Time  
##  1947.93654      0.05872     34.13151     -0.32515

#c
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## 
## Attaching package: 'forecast'
## 
## The following objects are masked from 'package:fabletools':
## 
##     accuracy, forecast

checkresiduals(olympic_lm)

## 
##  Breusch-Godfrey test for serial correlation of order up to 10
## 
## data:  Residuals
## LM test = 179.57, df = 10, p-value < 2.2e-16

detach(package:forecast, unload = TRUE)
#d
olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "100") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "100") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "200") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "200") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "400") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "400") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "800") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "800") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "1500") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "1500") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "5000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "5000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "men") %>%
  filter(Length == "10000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

olympic_running %>%
  filter(Sex == "women") %>%
  filter(Length == "10000") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(new_data(olympic_running, 1) %>%
             mutate(Year = 2020)) %>%
  autoplot(olympic_running)

Exercise 3

autoplot(souvenirs)

## Plot variable not specified, automatically selected `.vars = Sales`

#it spikes a lot during Christmas and has increased throughout the years. I don't see anything really unusual. 
#b. We take logs to make an exponential model into a linear model. 
#c
dumfest <- souvenirs %>% mutate(ifelse(month(Month)==3,1,0))
fitsou2 <- dumfest %>% model(TSLM(log(Sales) ~ trend() + season()))
report(fitsou2)

## Series: Sales 
## Model: TSLM 
## Transformation: log(Sales) 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.416437 -0.126185  0.006075  0.113889  0.385670 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.6058604  0.0768740  98.939  < 2e-16 ***
## trend()        0.0223930  0.0008448  26.508  < 2e-16 ***
## season()year2  0.2510437  0.0993278   2.527 0.013718 *  
## season()year3  0.6952066  0.0993386   6.998 1.18e-09 ***
## season()year4  0.3829341  0.0993565   3.854 0.000252 ***
## season()year5  0.4079944  0.0993817   4.105 0.000106 ***
## season()year6  0.4469625  0.0994140   4.496 2.63e-05 ***
## season()year7  0.6082156  0.0994534   6.116 4.69e-08 ***
## season()year8  0.5853524  0.0995001   5.883 1.21e-07 ***
## season()year9  0.6663446  0.0995538   6.693 4.27e-09 ***
## season()year10 0.7440336  0.0996148   7.469 1.61e-10 ***
## season()year11 1.2030164  0.0996828  12.068  < 2e-16 ***
## season()year12 1.9581366  0.0997579  19.629  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1858 on 71 degrees of freedom
## Multiple R-squared: 0.9527,  Adjusted R-squared: 0.9447
## F-statistic: 119.1 on 12 and 71 DF, p-value: < 2.22e-16

augment(fitsou2) %>%
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Sales, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted")) +
  labs(y="Megalitres",title ="Australian quarterly beer production") +
  scale_colour_manual(values = c(Data = "black", Fitted = "#D55E00"))

#e
mdl <- augment(fitsou2, souvenirs$Month)

## Warning: The `type` argument of `augment()` is deprecated as of fabletools 0.2.1.
## The type argument is now deprecated for changes to broom v0.7.0.
## Response residuals are now always found in `.resid` and innovation residuals are now found in `.innov`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

mdl$Month <- month(mdl$Month, label=FALSE)
mdlres <- data.frame(inno=mdl$.innov, month=mdl$Month)
boxplot(inno~month, data=mdlres)

#f
#the values of the coeffients tell us how much the mean of the y variable changes in a one unit change of the x variable. 

#g
augment(fitsou2) %>% 
  features(.innov,ljung_box, lag = 24, dof = 0)

## # A tibble: 1 × 3
##   .model                                lb_stat lb_pvalue
##   <chr>                                   <dbl>     <dbl>
## 1 TSLM(log(Sales) ~ trend() + season())    102.  1.21e-11

#this tells us if our residuals are significantly different from zero, which it is because the Pvalue is near zero. 

#h


#i You could take a box cox to improve the seasonality.

Exercise 4

oldgas <- us_gasoline %>% 
  slice(1:726)
autoplot(oldgas)

## Plot variable not specified, automatically selected `.vars = Barrels`

foldgas2 <- oldgas %>% 
  model(TSLM(Barrels~trend()+fourier(K=2)))
report(foldgas2)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.9375162 -0.1897569 -0.0006692  0.2058275  1.0016928 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.094e+00  2.121e-02 334.493  < 2e-16 ***
## trend()              2.802e-03  5.057e-05  55.420  < 2e-16 ***
## fourier(K = 2)C1_52 -1.237e-01  1.497e-02  -8.265 6.71e-16 ***
## fourier(K = 2)S1_52 -2.383e-01  1.497e-02 -15.917  < 2e-16 ***
## fourier(K = 2)C2_52  4.493e-02  1.495e-02   3.006  0.00274 ** 
## fourier(K = 2)S2_52  1.054e-02  1.498e-02   0.704  0.48193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.285 on 720 degrees of freedom
## Multiple R-squared: 0.8271,  Adjusted R-squared: 0.8259
## F-statistic: 688.8 on 5 and 720 DF, p-value: < 2.22e-16

augment(foldgas2) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted"))

foldgas11 <- oldgas %>% 
  model(TSLM(Barrels~trend()+fourier(K=11)))
report(foldgas11)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.896208 -0.177835  0.003786  0.173355  1.014511 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.095e+00  1.994e-02 355.717  < 2e-16 ***
## trend()                2.799e-03  4.756e-05  58.844  < 2e-16 ***
## fourier(K = 11)C1_52  -1.245e-01  1.407e-02  -8.845  < 2e-16 ***
## fourier(K = 11)S1_52  -2.394e-01  1.408e-02 -17.007  < 2e-16 ***
## fourier(K = 11)C2_52   4.529e-02  1.405e-02   3.223  0.00133 ** 
## fourier(K = 11)S2_52   9.256e-03  1.408e-02   0.657  0.51125    
## fourier(K = 11)C3_52   9.632e-02  1.408e-02   6.842 1.70e-11 ***
## fourier(K = 11)S3_52   3.762e-05  1.405e-02   0.003  0.99787    
## fourier(K = 11)C4_52   2.899e-02  1.408e-02   2.060  0.03980 *  
## fourier(K = 11)S4_52   2.884e-02  1.406e-02   2.052  0.04054 *  
## fourier(K = 11)C5_52  -3.354e-02  1.405e-02  -2.387  0.01725 *  
## fourier(K = 11)S5_52   3.172e-02  1.408e-02   2.253  0.02458 *  
## fourier(K = 11)C6_52  -6.569e-02  1.406e-02  -4.671 3.59e-06 ***
## fourier(K = 11)S6_52   2.808e-02  1.407e-02   1.996  0.04628 *  
## fourier(K = 11)C7_52  -2.235e-02  1.408e-02  -1.588  0.11283    
## fourier(K = 11)S7_52   3.274e-02  1.405e-02   2.330  0.02010 *  
## fourier(K = 11)C8_52  -1.664e-02  1.406e-02  -1.183  0.23704    
## fourier(K = 11)S8_52  -1.428e-03  1.407e-02  -0.102  0.91916    
## fourier(K = 11)C9_52  -1.763e-02  1.406e-02  -1.254  0.21019    
## fourier(K = 11)S9_52  -5.731e-04  1.407e-02  -0.041  0.96753    
## fourier(K = 11)C10_52  1.272e-02  1.407e-02   0.904  0.36621    
## fourier(K = 11)S10_52 -2.359e-02  1.406e-02  -1.678  0.09375 .  
## fourier(K = 11)C11_52 -2.837e-02  1.407e-02  -2.016  0.04414 *  
## fourier(K = 11)S11_52  2.555e-02  1.406e-02   1.817  0.06962 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.268 on 702 degrees of freedom
## Multiple R-squared: 0.851,   Adjusted R-squared: 0.8461
## F-statistic: 174.3 on 23 and 702 DF, p-value: < 2.22e-16

augment(foldgas11) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted"))

foldgas26 <- oldgas %>% 
  model(TSLM(Barrels~trend()+fourier(K=26)))
report(foldgas26)

## Series: Barrels 
## Model: TSLM 
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.834099 -0.172836  0.001292  0.167365  1.034800 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.095e+00  1.994e-02 355.756  < 2e-16 ***
## trend()                2.799e-03  4.755e-05  58.858  < 2e-16 ***
## fourier(K = 26)C1_52  -1.244e-01  1.407e-02  -8.843  < 2e-16 ***
## fourier(K = 26)S1_52  -2.394e-01  1.408e-02 -17.004  < 2e-16 ***
## fourier(K = 26)C2_52   4.527e-02  1.405e-02   3.222  0.00134 ** 
## fourier(K = 26)S2_52   9.340e-03  1.408e-02   0.663  0.50740    
## fourier(K = 26)C3_52   9.624e-02  1.408e-02   6.837 1.81e-11 ***
## fourier(K = 26)S3_52   7.369e-05  1.405e-02   0.005  0.99582    
## fourier(K = 26)C4_52   2.892e-02  1.407e-02   2.055  0.04031 *  
## fourier(K = 26)S4_52   2.880e-02  1.405e-02   2.050  0.04079 *  
## fourier(K = 26)C5_52  -3.356e-02  1.405e-02  -2.389  0.01719 *  
## fourier(K = 26)S5_52   3.164e-02  1.408e-02   2.247  0.02494 *  
## fourier(K = 26)C6_52  -6.564e-02  1.406e-02  -4.668 3.67e-06 ***
## fourier(K = 26)S6_52   2.802e-02  1.407e-02   1.992  0.04676 *  
## fourier(K = 26)C7_52  -2.227e-02  1.408e-02  -1.582  0.11409    
## fourier(K = 26)S7_52   3.274e-02  1.405e-02   2.330  0.02009 *  
## fourier(K = 26)C8_52  -1.659e-02  1.406e-02  -1.180  0.23839    
## fourier(K = 26)S8_52  -1.368e-03  1.407e-02  -0.097  0.92254    
## fourier(K = 26)C9_52  -1.765e-02  1.406e-02  -1.255  0.20977    
## fourier(K = 26)S9_52  -4.998e-04  1.407e-02  -0.036  0.97168    
## fourier(K = 26)C10_52  1.266e-02  1.407e-02   0.900  0.36859    
## fourier(K = 26)S10_52 -2.356e-02  1.406e-02  -1.676  0.09419 .  
## fourier(K = 26)C11_52 -2.843e-02  1.407e-02  -2.021  0.04366 *  
## fourier(K = 26)S11_52  2.553e-02  1.406e-02   1.815  0.06991 .  
## fourier(K = 26)C12_52 -1.181e-02  1.406e-02  -0.840  0.40118    
## fourier(K = 26)S12_52 -2.505e-02  1.407e-02  -1.781  0.07539 .  
## fourier(K = 26)C13_52  6.166e-03  1.406e-02   0.438  0.66119    
## fourier(K = 26)S13_52  1.409e-02  1.406e-02   1.002  0.31687    
## fourier(K = 26)C14_52 -3.056e-03  1.406e-02  -0.217  0.82802    
## fourier(K = 26)S14_52  1.800e-02  1.407e-02   1.279  0.20120    
## fourier(K = 26)C15_52 -1.247e-03  1.407e-02  -0.089  0.92937    
## fourier(K = 26)S15_52  1.735e-02  1.406e-02   1.234  0.21766    
## fourier(K = 26)C16_52 -1.207e-02  1.407e-02  -0.858  0.39117    
## fourier(K = 26)S16_52 -1.492e-03  1.406e-02  -0.106  0.91550    
## fourier(K = 26)C17_52  7.686e-03  1.406e-02   0.547  0.58466    
## fourier(K = 26)S17_52 -2.178e-02  1.407e-02  -1.548  0.12210    
## fourier(K = 26)C18_52  2.236e-03  1.406e-02   0.159  0.87369    
## fourier(K = 26)S18_52  1.775e-02  1.407e-02   1.262  0.20739    
## fourier(K = 26)C19_52 -2.301e-03  1.408e-02  -0.163  0.87021    
## fourier(K = 26)S19_52 -6.677e-05  1.405e-02  -0.005  0.99621    
## fourier(K = 26)C20_52 -2.977e-03  1.406e-02  -0.212  0.83240    
## fourier(K = 26)S20_52  6.098e-04  1.407e-02   0.043  0.96543    
## fourier(K = 26)C21_52  6.089e-03  1.405e-02   0.433  0.66483    
## fourier(K = 26)S21_52  3.927e-03  1.408e-02   0.279  0.78039    
## fourier(K = 26)C22_52 -8.038e-04  1.407e-02  -0.057  0.95447    
## fourier(K = 26)S22_52 -1.126e-02  1.405e-02  -0.801  0.42323    
## fourier(K = 26)C23_52 -2.159e-02  1.408e-02  -1.534  0.12551    
## fourier(K = 26)S23_52  1.192e-02  1.405e-02   0.848  0.39656    
## fourier(K = 26)C24_52  7.003e-04  1.405e-02   0.050  0.96025    
## fourier(K = 26)S24_52  1.837e-02  1.408e-02   1.305  0.19241    
## fourier(K = 26)C25_52  4.907e-03  1.406e-02   0.349  0.72717    
## fourier(K = 26)S25_52  7.445e-03  1.407e-02   0.529  0.59686    
## fourier(K = 26)C26_52 -3.084e-02  9.944e-03  -3.101  0.00201 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2679 on 673 degrees of freedom
## Multiple R-squared: 0.8572,  Adjusted R-squared: 0.8461
## F-statistic: 77.67 on 52 and 673 DF, p-value: < 2.22e-16

augment(foldgas26) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted"))

fit <- oldgas %>%
  model(K1 = TSLM(Barrels ~ trend() + fourier(K = 1)),
        K2 = TSLM(Barrels ~ trend() + fourier(K = 2)),
        K3 = TSLM(Barrels ~ trend() + fourier(K = 3)),
        K4 = TSLM(Barrels ~ trend() + fourier(K = 4)),
        K5 = TSLM(Barrels ~ trend() + fourier(K = 5)),
        K6 = TSLM(Barrels ~ trend() + fourier(K = 6)),
        K7 = TSLM(Barrels ~trend()+ fourier(K=7)),
        K8 = TSLM(Barrels ~trend()+ fourier(K=8)),
        K9 = TSLM(Barrels ~trend()+ fourier(K=9)),
        K10 = TSLM(Barrels ~trend()+ fourier(K=10)),
        K11 = TSLM(Barrels ~trend()+ fourier(K=11)),
        K20=TSLM(Barrels ~trend()+ fourier(K=20)),
        K26=TSLM(Barrels ~trend()+ fourier(K=26)))

glance(fit) %>% select(.model, r_squared, adj_r_squared, AICc)

## # A tibble: 13 × 4
##    .model r_squared adj_r_squared   AICc
##    <chr>      <dbl>         <dbl>  <dbl>
##  1 K1         0.825         0.824 -1809.
##  2 K2         0.827         0.826 -1814.
##  3 K3         0.837         0.835 -1853.
##  4 K4         0.839         0.837 -1856.
##  5 K5         0.841         0.838 -1862.
##  6 K6         0.846         0.844 -1884.
##  7 K7         0.848         0.845 -1887.
##  8 K8         0.848         0.845 -1885.
##  9 K9         0.849         0.845 -1882.
## 10 K10        0.849         0.845 -1881.
## 11 K11        0.851         0.846 -1885.
## 12 K20        0.854         0.845 -1859.
## 13 K26        0.857         0.846 -1851.

#clearly k= 11 is the best choice because it has the lowest AICC.

#c
foldgas11 %>% 
  gg_tsresiduals()

augment(foldgas11) %>% 
  features(.innov, ljung_box, lag = 104, dof = 0)

## # A tibble: 1 × 3
##   .model                                    lb_stat lb_pvalue
##   <chr>                                       <dbl>     <dbl>
## 1 TSLM(Barrels ~ trend() + fourier(K = 11))    149.   0.00268

#d
augfoldgas11 <- augment(foldgas11)
autoplot(augfoldgas11) +
  autolayer(us_gasoline)

## Plot variable not specified, automatically selected `.vars = Barrels`
## Plot variable not specified, automatically selected `.vars = Barrels`

newgas <- us_gasoline %>% slice(727:1355)

  autoplot(augfoldgas11)

## Plot variable not specified, automatically selected `.vars = Barrels`

Exercise 5

afgan <- global_economy %>% 
  filter(Country=="Afghanistan") %>% 
  select(Year, Population)
autoplot(afgan)

## Plot variable not specified, automatically selected `.vars = Population`

#in the 1980's there is a clear dip because of the Soviet Afghan war. 
#b
aftrends <- afgan %>% 
  model(
    Linear=TSLM(Population~trend()),
    piecewise=TSLM(Population~trend(knots=c(1980,1989)))
  )
afgan %>%
  autoplot(Population) +
  geom_line(data = fitted(aftrends),
            aes(y = .fitted, colour = .model))

#clearly the piecewise function does much better. 
afganfc <- aftrends %>% 
  forecast(h=5)
afgan %>%
  autoplot(Population) +
  geom_line(data = fitted(aftrends),
            aes(x=Year, y = .fitted, colour = .model))+
  autolayer(afganfc)

#yeah the piecewise is much better. keeps with the true data much better

Exercise 6

assume model is reasonable approximation to reality, and that it is linear. 2. error term has mean of zero. 3. error terms arent autocorrelated. 4. error terms are unrelated to predictor variables. 5. erroe terms are normally distributed.6. predictors arent random variables. 6. no multicolinearity. 7. homoscedastic. Unbiased estimator means that your estimation of B is equal to the actual value of B. A consistant estimator means that as the number of observations apporaches infinity, the estimator approaches the true value. R^2 tells you how much of the variation in your y variable is explained by your model. Adjusted R^2 takes degrees of freedom into account when doing this calculation.

hw4

Matt Mullis

10-8-22

Load packages and data

Questions

Exercise 1

Exercise 2

Exercise 3

Exercise 4

Exercise 5

Exercise 6