library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr   2.1.2     ✔ stringr 1.4.1
## ✔ purrr   0.3.4     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()
library(dplyr)
library(seasonal)
## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:tsibble':
## 
##     index
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tidyr)
library(tibble)
library(ggfortify)
library(moments)
library(fable)
library(brew)
library(sos)
## 
## Attaching package: 'sos'
## 
## The following object is masked from 'package:tidyr':
## 
##     matches
## 
## The following object is masked from 'package:dplyr':
## 
##     matches
## 
## The following object is masked from 'package:utils':
## 
##     ?
library(slider)
library(x13binary)
library(USgas)

Question 1

  1. The relationship is positive because as temperature increases and more electricity is needed to cool houses and buildings.
    See if you can make it print the regression equation
#Extracting Jan 2014 and formatting
jan_elec <- vic_elec %>%
  filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
  index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Temperature = max(Temperature)
  )
#plotting demand and temp
jan_elec %>%
  gather("Variable", "Value", Demand, Temperature) %>%
  ggplot(aes(x = Date, y= Value)) +
  geom_line() +
  facet_grid(vars(Variable), scales = "free_y") +
  labs(title = "Demand and Temperature for January 2014")

#Scatter Plot
jan_elec %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  labs(title = "Electricity Demand") 

#Scatter with regression
jan_elec %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=TRUE) +
  labs(title = "Electricity Demand (with regression)")
## `geom_smooth()` using formula 'y ~ x'

  1. The model is not adequate. The residuals have an upward trend which means they are not white noise. The distribution is not normal. The distribution is skewed right but has fat tails. The increase in the left tail indicates an outlier. For these reasons, the model will have a low predictive accuracy.
resid_ <- jan_elec %>% model(TSLM(Demand ~ Temperature))
resid_ %>% gg_tsresiduals()

  1. I do not believe these forecast due to the issues in the residuals discussed in part b. I think the model would benefit by adding a dummy variable for whether or not the day is a holiday and if children are in school or not.
degree_15 <- jan_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan_elec, 1) %>%
      mutate(Temperature = 15)
  ) %>%
  autoplot(jan_elec)
degree_15

degree35 <- jan_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan_elec, 1) %>%
      mutate(Temperature = 35)
  ) %>%
  autoplot(jan_elec)
degree35

  1. The point forecast when temperature is 15 degrees is 151398 with an 80% PI of [117908.1, 184888.6]. The point forecast when temperature is 35 degrees is 274484 with an 80% PI of [242088.4, 306880.1].
fc15 <- jan_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(new_data(jan_elec, 1) %>%
  mutate(Temperature = 15)) %>%
  hilo()
fc15
## # A tsibble: 1 x 7 [1D]
## # Key:       .model [1]
##   .model     Date                   Demand  .mean Tempe…¹                  `80%`
##   <chr>      <date>                 <dist>  <dbl>   <dbl>                 <hilo>
## 1 TSLM(Dema… 2014-02-01 N(151398, 6.8e+08) 1.51e5      15 [117908.1, 184888.6]80
## # … with 1 more variable: `95%` <hilo>, and abbreviated variable name
## #   ¹​Temperature
fc35 <- jan_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(new_data(jan_elec, 1) %>%
  mutate(Temperature = 35)) %>%
  hilo()
fc35
## # A tsibble: 1 x 7 [1D]
## # Key:       .model [1]
##   .model     Date                   Demand  .mean Tempe…¹                  `80%`
##   <chr>      <date>                 <dist>  <dbl>   <dbl>                 <hilo>
## 1 TSLM(Dema… 2014-02-01 N(274484, 6.4e+08) 2.74e5      35 [242088.4, 306880.1]80
## # … with 1 more variable: `95%` <hilo>, and abbreviated variable name
## #   ¹​Temperature
  1. The plot somewhat resembles a check mark. This makes since because the average room tempurature is around 20 degrees celsius. As the tempurature goes above or below this, electricity is need to condition the room. If I were to model this, a simple regression would not be the best fit for the data.
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Tempurature")

Question 2

  1. There’s an overall decreasing trend in every graph with the exception of the women’s 1500.
autoplot(olympic_running)
## Plot variable not specified, automatically selected `.vars = Time`

autoplot(olympic_running) +
  facet_wrap(~Length + Sex, scales = "free") 
## Plot variable not specified, automatically selected `.vars = Time`

b. The average rate per year is -0.3905

olympic_reg <- lm(Time ~ ., olympic_running)
olympic_reg
## 
## Call:
## lm(formula = Time ~ ., data = olympic_running)
## 
## Coefficients:
## (Intercept)         Year       Length     Sexwomen  
##    734.9863      -0.3905       0.1766      32.9732
  1. The residuals do not indicate suitability of the fitted line. The are several points of correlation shown in the ACF graph. The distribution is not normal with fat tails indication several outliers. The residual plot is not random. These indicate the model isn’t the best for predicting future values.

unloadNamespace("forecast")

olympic_running %>%
  filter(Sex=="men") %>%
  filter(Length=="100") %>%
  model(TSLM(Time ~ Year)) %>%
  forecast(
    new_data(olympic_running, 1) %>%
      mutate(Year = 2020)
  ) %>%
  autoplot(olympic_running) + 
  labs(title = "Forecasted Time")

Question 3

  1. There is an upward trend in the data with a seasonal component that increases in magnitude.
autoplot(souvenirs)
## Plot variable not specified, automatically selected `.vars = Sales`

b. It is necessary to take the log so that the variance of the seasonal changes are constant though time.

souv_sales <- as.numeric(souvenirs$Sales)
hist(souv_sales)

souvenir_log_sales <- log(souvenirs$Sales)
hist(souvenir_log_sales)

c.

  1. The

Question 4

us_gas <- us_gasoline %>%
  filter(year(Week) < "2005")

fourier_gas1 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 1)))

augment(fourier_gas1) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression (K = 1)") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas5 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 5)))

augment(fourier_gas5) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression (K = 5)") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

fourier_gas10 <- us_gas %>%
  model(TSLM(Barrels ~ trend() + fourier(K = 10)))

augment(fourier_gas10) %>%
  ggplot(aes(x = Week)) +
  geom_line(aes(y = Barrels, color = "Data")) +
  geom_line(aes(y = .fitted, color = "Fitted")) +
  labs(y = "Thousand of Barrels Per Day", title = "US Gas Harmonic Regression (K = 10)") +
  scale_color_manual(values = c(Data = "black", Fitted = "red"))

b.

gas_fit <- us_gas %>%
  model(K1 = TSLM(Barrels ~ trend() + fourier(K = 1)),
        K2 = TSLM(Barrels ~ trend() + fourier(K = 2)),
        K5 = TSLM(Barrels ~ trend() + fourier(K = 5)),
        K10 = TSLM(Barrels ~ trend() + fourier(K = 10)),
        K20 = TSLM(Barrels ~ trend() + fourier(K = 20)),
        K26 = TSLM(Barrels ~ trend() + fourier(K = 26)))
glance(gas_fit) %>% select(.model, r_squared, adj_r_squared, AICc)
## # A tibble: 6 × 4
##   .model r_squared adj_r_squared   AICc
##   <chr>      <dbl>         <dbl>  <dbl>
## 1 K1         0.825         0.824 -1809.
## 2 K2         0.827         0.826 -1814.
## 3 K5         0.841         0.838 -1862.
## 4 K10        0.849         0.845 -1881.
## 5 K20        0.854         0.845 -1859.
## 6 K26        0.857         0.846 -1851.
gas_fit <- us_gas %>%
  model(K11 = TSLM(Barrels ~ trend() + fourier(K = 11)),
        K12 = TSLM(Barrels ~ trend() + fourier(K = 12)),
        K13 = TSLM(Barrels ~ trend() + fourier(K = 13)),
        K14 = TSLM(Barrels ~ trend() + fourier(K = 14)),
        K15 = TSLM(Barrels ~ trend() + fourier(K = 15)),
        K16 = TSLM(Barrels ~ trend() + fourier(K = 16)),
        K17 = TSLM(Barrels ~ trend() + fourier(K = 17)),
        K18 = TSLM(Barrels ~ trend() + fourier(K = 18)),
        K19 = TSLM(Barrels ~ trend() + fourier(K = 19)))
glance(gas_fit) %>% select(.model, r_squared, adj_r_squared, AICc)
## # A tibble: 9 × 4
##   .model r_squared adj_r_squared   AICc
##   <chr>      <dbl>         <dbl>  <dbl>
## 1 K11        0.851         0.846 -1885.
## 2 K12        0.852         0.847 -1884.
## 3 K13        0.852         0.846 -1881.
## 4 K14        0.852         0.846 -1879.
## 5 K15        0.853         0.846 -1876.
## 6 K16        0.853         0.846 -1872.
## 7 K17        0.853         0.846 -1871.
## 8 K18        0.854         0.846 -1868.
## 9 K19        0.854         0.846 -1864.

Question 5

  1. The decrease in the graph just before 1990 is the Soviet-Afghan war.
afghanistan <- global_economy %>%
  filter(Country == "Afghanistan") %>%
  mutate(Population_mil = Population/1000)

autoplot(afghanistan, Population_mil) +
  labs(title = "Afghanistan Population", y = "Population (In Thousands)", x = "Year")

b.

ols_afghan <- afghanistan %>% model(TSLM(Population_mil ~ Year))

ols_afghan %>%
  gg_tsresiduals()

piecewise_afghan <- afghanistan %>%
  model(piecewise = TSLM(Population_mil ~ trend(knots = c(1980,1989))))

Question 6