+— title: ‘Assignement #1’ author: “Thomas McNamee” date: ‘2022-07-09’ output: html_document —

# Question 4
library(USgas)
us_total <- us_total %>% as_tibble(key = state, index = year)
us_total %>%
  filter(state %in% c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island')) %>%
  ggplot(aes(x = year, y = y, colour = state)) +
  geom_line() +
  facet_grid(state ~., scales = "free_y") +
  labs(title = "Annual Natural Gas Consumption in New England",
       y = "Consumption")

# Question 5
tourism <- readxl::read_excel("C:/Users/tmcna/OneDrive/Predictive Analytics & Forecasting/tourism.xlsx")

tourism_ts <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(key = c(Region, State, Purpose),
             index = Quarter)

tourism_ts %>%
  group_by(Region, Purpose) %>%
  mutate(Avg_Trips = mean(Trips)) %>%
  ungroup() %>%
  filter(Avg_Trips == max(Avg_Trips)) %>%
  distinct(Region, Purpose)
## # A tibble: 1 × 2
##   Region Purpose 
##   <chr>  <chr>   
## 1 Sydney Visiting
tourism %>%
  group_by(Quarter, State) %>%
  mutate(Quarter = yearquarter(Quarter),
         Total_Trips = sum(Trips)) %>%
  select(Quarter, State, Total_Trips) %>%
  distinct() %>%
  as_tsibble(index = Quarter,
             key = State)
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
## # Groups:    State @ Quarter [640]
##    Quarter State Total_Trips
##      <qtr> <chr>       <dbl>
##  1 1998 Q1 ACT          551.
##  2 1998 Q2 ACT          416.
##  3 1998 Q3 ACT          436.
##  4 1998 Q4 ACT          450.
##  5 1999 Q1 ACT          379.
##  6 1999 Q2 ACT          558.
##  7 1999 Q3 ACT          449.
##  8 1999 Q4 ACT          595.
##  9 2000 Q1 ACT          600.
## 10 2000 Q2 ACT          557.
## # … with 630 more rows
# Question 7
gas <- tail(aus_production, 5*4) %>% select(Gas)

gas %>%
  autoplot()+
  labs(title = "Last Five Years of The Gas Data")+
  theme_replace()+
  geom_line(col = "#1B89D3")
## Plot variable not specified, automatically selected `.vars = Gas`

gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Last Five Years of The Gas Data")
## Warning: Removed 2 row(s) containing missing values (geom_path).

gas_decom <- gas %>%
             model(classical_decomposition(Gas,type = "multiplicative")) %>%
             components()
gas_decom %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Last Five Years of The Gas Data") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 row(s) containing missing values (geom_path).

new_gas <- gas
new_gas$Gas[10] <- new_gas$Gas[10]+300

new_gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 row(s) containing missing values (geom_path).

new_gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Last Five Years of The Gas Data with 300 added to the 10th observation") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 row(s) containing missing values (geom_path).

# Question 8
plot(aus_livestock)

aus_livestock1 <- aus_livestock$Count

head_ts <- function(aus_livestock1, n) {
  aus_livestock1 <- as.ts(aus_livestock1)
  window(aus_livestock1, end=tsp(aus_livestock1)[2]-(n-1)/frequency(aus_livestock1))
}
training <- head(aus_livestock1, 476)
training
##   [1] 2300 2100 2100 1900 2100 1800 1800 1900 2700 2300 2500 2900 2400 2400 2200
##  [16] 2300 2400 2200 2400 2600 2800 2600 2900 3000 2500 2800 2600 2800 2700 2400
##  [31] 2400 2600 2300 1700 2000 2000 1600 1600 2000 2100 2600 2100 3200 2700 2900
##  [46] 2200 2800 2100 1500 2000 2600 2500 2400 1300 1700 2200 1700 2300 1900 1800
##  [61] 1800 2100 2800 3400 1600 1700 2900 2900 2700 1800 2100 2400 2400  700    0
##  [76]    0  900  500 1400 1400 1200 1100  900 1100 1000 1300 1400 1300 1800 1500
##  [91] 1800 2000 2100 1600 2300 2000 1900 2200 1500 1900 2000 1300 1800 1400 1600
## [106] 1400 1300 1100 1300 1200 1400 2000 1700 1800 2000 1800 1800 2300 1800 2200
## [121] 2300 1600 2000 1600 1500 2200 1800 1800 1700 2200 1900 1900 2300 1400 2000
## [136] 1900 2300 2300 1800 2300 2600 1900 1800 2200 2300 3100 2400 2300 3100 2300
## [151] 2300 2700 3400 2700 3200 2500 2200 2800 2500 2700 4100 2900 3700 2800 3100
## [166] 2300 3000 2500 2900 2800 2500 2900 3000 2700 2200 2600 2200 2600 3600 2500
## [181]    0    0 2500 3000 2400 2300 3100 2500 2400 3000 2800 2500 3000 2200 2600
## [196] 2000    0 2500 2300 2700 3400 2700 3300 4000 3100    0    0    0    0    0
## [211]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [226]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [241]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [256]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [271]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [286]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [301]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [316]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [331]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [346]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [361]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [376]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [391]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [406]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [421]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [436]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [451]    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## [466]    0    0    0    0    0    0    0    0    0    0    0
tail_ts <- function(aus_livestock1,n) {
  aus_livestock1 <- as.ts(aus_livestock1)
  window(aus_livestock1,start=tsp(aus_livestock1)[2]-(n-1)/frequency(aus_livestock1))
}
test <- tail(aus_livestock1,76)
test
##  [1] 136500 161700 166300 106100 160200 139800 116300 122200 131900  58100
## [11]  46600 114300 146300 168700 172600 153400 185600 173400 137400 111300
## [21] 102600  50000  41300  96000 134700 185800 150900 129700 124000 123200
## [31] 119000  77800  85600  59100  34900 109700 147000 138300 161400 128200
## [41] 126500 130400 122900 115800  96900  45900  27600 108100 118200 108000
## [51] 100600  89100  92000  71000  60100  44200  77600  44500  69300 156800
## [61] 150100 153900 171300 124900 114200  95800  93900  81200 105600  51200
## [71]  37800 160600 121900 134000 153700 127300
livestockfit1 <- meanf(training, h=76)
livestockfit2 <- naive(training, h=76)
livestockfit3 <- snaive(training, h=76)
livestockfit4 <- rwf(training, h=76, drift=TRUE)
plot(livestockfit1, plot.conf=FALSE, ylab="Number Sold", xlab="Year",
     main="Forecasts for two years production")
## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a graphical
## parameter
## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf" is not
## a graphical parameter
## Warning in axis(1, ...): "plot.conf" is not a graphical parameter
## Warning in axis(2, ...): "plot.conf" is not a graphical parameter
## Warning in box(...): "plot.conf" is not a graphical parameter
lines(livestockfit2$mean,col=2)
lines(livestockfit3$mean,col=3)
lines(livestockfit4$mean,col=4)
legend("topleft",lty=1,col=c(4,2,3,5),
      legend=c("Mean method","Naive method","Seasonal naive method", "Drift method"))

res <- residuals(snaive(aus_livestock1))
plot(res)

# Question 1
jan14_vic_elec <- vic_elec %>%
  filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
  index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Temperature = max(Temperature)
  )
jan14_vic_elec %>%
  gather("Variable", "Value", Demand, Temperature) %>%
  ggplot(aes(x = Date, y = Value)) +
  geom_line() +
  facet_grid(vars(Variable), scales = "free_y") +
  labs(title = "Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.5))

jan14_vic_elec %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE) +
  labs(title = "Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'

fit <- jan14_vic_elec %>% model(TSLM(Demand ~ Temperature))
fit %>% gg_tsresiduals()

demand15 <- jan14_vic_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan14_vic_elec, 1) %>%
      mutate(Temperature = 15)
  ) %>%
  autoplot(jan14_vic_elec)

demand35 <- jan14_vic_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan14_vic_elec, 1) %>%
      mutate(Temperature = 35)
  ) %>%
  autoplot(jan14_vic_elec)
demand35

fit <- lm(Demand ~ Temperature, data=jan14_vic_elec)

forecast(fit, newdata=data.frame(Temperature=c(15,35)))
##           Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 151398.35       151398.4 117127.2 185669.5  97951.22 204845.5
## 274484.25       274484.2 241333.0 307635.5 222783.69 326184.8
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Temp")

# Question 5
global_economy %>% 
    filter(Country == 'Estonia') -> est_econ

est_econ %>%
    filter(Year <= year(as.Date("2015-01-01"))) -> est_train

est_econ %>%
  autoplot(Exports) +
  labs(y = "Exports", title = "Estonian Exports")
## Warning: Removed 35 row(s) containing missing values (geom_path).

est_train$Exports %>%
    ets(model = "ANN") %>% 
    forecast(h = 2) -> est_ses
## Warning in ets(., model = "ANN"): Missing values encountered. Using longest
## contiguous portion of time series
sqrt(mean(est_ses$residuals^2))
## [1] 5.731807
est_train$Exports %>%
    ets(model = "AAN")  %>% 
    forecast(h = 2) -> est_holt
## Warning in ets(., model = "AAN"): Missing values encountered. Using longest
## contiguous portion of time series
sqrt(mean(est_holt$residuals^2))
## [1] 5.800228
est_ses %>% autoplot()

est_holt %>% autoplot()

est_ses
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 57       78.60668 70.88413 86.32922 66.79606 90.41729
## 58       78.60668 67.68590 89.52746 61.90478 95.30857
est_holt
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 57        78.5536 70.29195 86.81524 65.91851 91.18869
## 58        78.5005 66.81680 90.18419 60.63183 96.36917
s1 <- sd(est_ses$residuals)
est_ses$mean[1] - 1.96 * s1
## [1] 67.13968
est_ses$mean[1] + 1.96 * s1
## [1] 90.07368
s2 <- sd(est_holt$residuals)
est_holt$mean[1] - 1.96 * s2
## [1] 66.9291
est_holt$mean[1] + 1.96 * s2
## [1] 90.17809