+— title: ‘Assignement #1’ author: “Thomas McNamee” date: ‘2022-07-09’ output: html_document —
# Question 4
library(USgas)
us_total <- us_total %>% as_tibble(key = state, index = year)
us_total %>%
filter(state %in% c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island')) %>%
ggplot(aes(x = year, y = y, colour = state)) +
geom_line() +
facet_grid(state ~., scales = "free_y") +
labs(title = "Annual Natural Gas Consumption in New England",
y = "Consumption")
# Question 5
tourism <- readxl::read_excel("C:/Users/tmcna/OneDrive/Predictive Analytics & Forecasting/tourism.xlsx")
tourism_ts <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(key = c(Region, State, Purpose),
index = Quarter)
tourism_ts %>%
group_by(Region, Purpose) %>%
mutate(Avg_Trips = mean(Trips)) %>%
ungroup() %>%
filter(Avg_Trips == max(Avg_Trips)) %>%
distinct(Region, Purpose)
## # A tibble: 1 × 2
## Region Purpose
## <chr> <chr>
## 1 Sydney Visiting
tourism %>%
group_by(Quarter, State) %>%
mutate(Quarter = yearquarter(Quarter),
Total_Trips = sum(Trips)) %>%
select(Quarter, State, Total_Trips) %>%
distinct() %>%
as_tsibble(index = Quarter,
key = State)
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## # Groups: State @ Quarter [640]
## Quarter State Total_Trips
## <qtr> <chr> <dbl>
## 1 1998 Q1 ACT 551.
## 2 1998 Q2 ACT 416.
## 3 1998 Q3 ACT 436.
## 4 1998 Q4 ACT 450.
## 5 1999 Q1 ACT 379.
## 6 1999 Q2 ACT 558.
## 7 1999 Q3 ACT 449.
## 8 1999 Q4 ACT 595.
## 9 2000 Q1 ACT 600.
## 10 2000 Q2 ACT 557.
## # … with 630 more rows
# Question 7
gas <- tail(aus_production, 5*4) %>% select(Gas)
gas %>%
autoplot()+
labs(title = "Last Five Years of The Gas Data")+
theme_replace()+
geom_line(col = "#1B89D3")
## Plot variable not specified, automatically selected `.vars = Gas`
gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data")
## Warning: Removed 2 row(s) containing missing values (geom_path).
gas_decom <- gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components()
gas_decom %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Last Five Years of The Gas Data") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 row(s) containing missing values (geom_path).
new_gas <- gas
new_gas$Gas[10] <- new_gas$Gas[10]+300
new_gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 row(s) containing missing values (geom_path).
new_gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Last Five Years of The Gas Data with 300 added to the 10th observation") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 row(s) containing missing values (geom_path).
# Question 8
plot(aus_livestock)
aus_livestock1 <- aus_livestock$Count
head_ts <- function(aus_livestock1, n) {
aus_livestock1 <- as.ts(aus_livestock1)
window(aus_livestock1, end=tsp(aus_livestock1)[2]-(n-1)/frequency(aus_livestock1))
}
training <- head(aus_livestock1, 476)
training
## [1] 2300 2100 2100 1900 2100 1800 1800 1900 2700 2300 2500 2900 2400 2400 2200
## [16] 2300 2400 2200 2400 2600 2800 2600 2900 3000 2500 2800 2600 2800 2700 2400
## [31] 2400 2600 2300 1700 2000 2000 1600 1600 2000 2100 2600 2100 3200 2700 2900
## [46] 2200 2800 2100 1500 2000 2600 2500 2400 1300 1700 2200 1700 2300 1900 1800
## [61] 1800 2100 2800 3400 1600 1700 2900 2900 2700 1800 2100 2400 2400 700 0
## [76] 0 900 500 1400 1400 1200 1100 900 1100 1000 1300 1400 1300 1800 1500
## [91] 1800 2000 2100 1600 2300 2000 1900 2200 1500 1900 2000 1300 1800 1400 1600
## [106] 1400 1300 1100 1300 1200 1400 2000 1700 1800 2000 1800 1800 2300 1800 2200
## [121] 2300 1600 2000 1600 1500 2200 1800 1800 1700 2200 1900 1900 2300 1400 2000
## [136] 1900 2300 2300 1800 2300 2600 1900 1800 2200 2300 3100 2400 2300 3100 2300
## [151] 2300 2700 3400 2700 3200 2500 2200 2800 2500 2700 4100 2900 3700 2800 3100
## [166] 2300 3000 2500 2900 2800 2500 2900 3000 2700 2200 2600 2200 2600 3600 2500
## [181] 0 0 2500 3000 2400 2300 3100 2500 2400 3000 2800 2500 3000 2200 2600
## [196] 2000 0 2500 2300 2700 3400 2700 3300 4000 3100 0 0 0 0 0
## [211] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [226] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [241] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [256] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [271] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [301] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [316] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [331] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [346] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [361] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [376] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [391] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [406] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [421] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [436] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [451] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [466] 0 0 0 0 0 0 0 0 0 0 0
tail_ts <- function(aus_livestock1,n) {
aus_livestock1 <- as.ts(aus_livestock1)
window(aus_livestock1,start=tsp(aus_livestock1)[2]-(n-1)/frequency(aus_livestock1))
}
test <- tail(aus_livestock1,76)
test
## [1] 136500 161700 166300 106100 160200 139800 116300 122200 131900 58100
## [11] 46600 114300 146300 168700 172600 153400 185600 173400 137400 111300
## [21] 102600 50000 41300 96000 134700 185800 150900 129700 124000 123200
## [31] 119000 77800 85600 59100 34900 109700 147000 138300 161400 128200
## [41] 126500 130400 122900 115800 96900 45900 27600 108100 118200 108000
## [51] 100600 89100 92000 71000 60100 44200 77600 44500 69300 156800
## [61] 150100 153900 171300 124900 114200 95800 93900 81200 105600 51200
## [71] 37800 160600 121900 134000 153700 127300
livestockfit1 <- meanf(training, h=76)
livestockfit2 <- naive(training, h=76)
livestockfit3 <- snaive(training, h=76)
livestockfit4 <- rwf(training, h=76, drift=TRUE)
plot(livestockfit1, plot.conf=FALSE, ylab="Number Sold", xlab="Year",
main="Forecasts for two years production")
## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a graphical
## parameter
## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf" is not
## a graphical parameter
## Warning in axis(1, ...): "plot.conf" is not a graphical parameter
## Warning in axis(2, ...): "plot.conf" is not a graphical parameter
## Warning in box(...): "plot.conf" is not a graphical parameter
lines(livestockfit2$mean,col=2)
lines(livestockfit3$mean,col=3)
lines(livestockfit4$mean,col=4)
legend("topleft",lty=1,col=c(4,2,3,5),
legend=c("Mean method","Naive method","Seasonal naive method", "Drift method"))
res <- residuals(snaive(aus_livestock1))
plot(res)
# Question 1
jan14_vic_elec <- vic_elec %>%
filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
index_by(Date = as_date(Time)) %>%
summarise(
Demand = sum(Demand),
Temperature = max(Temperature)
)
jan14_vic_elec %>%
gather("Variable", "Value", Demand, Temperature) %>%
ggplot(aes(x = Date, y = Value)) +
geom_line() +
facet_grid(vars(Variable), scales = "free_y") +
labs(title = "Electricity Demand") +
theme(plot.title = element_text(hjust = 0.5))
jan14_vic_elec %>%
ggplot(aes(x=Temperature, y=Demand)) +
geom_point() +
geom_smooth(method="lm", se=FALSE) +
labs(title = "Electricity Demand") +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'
fit <- jan14_vic_elec %>% model(TSLM(Demand ~ Temperature))
fit %>% gg_tsresiduals()
demand15 <- jan14_vic_elec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(jan14_vic_elec, 1) %>%
mutate(Temperature = 15)
) %>%
autoplot(jan14_vic_elec)
demand35 <- jan14_vic_elec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(jan14_vic_elec, 1) %>%
mutate(Temperature = 35)
) %>%
autoplot(jan14_vic_elec)
demand35
fit <- lm(Demand ~ Temperature, data=jan14_vic_elec)
forecast(fit, newdata=data.frame(Temperature=c(15,35)))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 151398.35 151398.4 117127.2 185669.5 97951.22 204845.5
## 274484.25 274484.2 241333.0 307635.5 222783.69 326184.8
plot(Demand~Temperature, data=vic_elec, main= "Demand vs. Temp")
# Question 5
global_economy %>%
filter(Country == 'Estonia') -> est_econ
est_econ %>%
filter(Year <= year(as.Date("2015-01-01"))) -> est_train
est_econ %>%
autoplot(Exports) +
labs(y = "Exports", title = "Estonian Exports")
## Warning: Removed 35 row(s) containing missing values (geom_path).
est_train$Exports %>%
ets(model = "ANN") %>%
forecast(h = 2) -> est_ses
## Warning in ets(., model = "ANN"): Missing values encountered. Using longest
## contiguous portion of time series
sqrt(mean(est_ses$residuals^2))
## [1] 5.731807
est_train$Exports %>%
ets(model = "AAN") %>%
forecast(h = 2) -> est_holt
## Warning in ets(., model = "AAN"): Missing values encountered. Using longest
## contiguous portion of time series
sqrt(mean(est_holt$residuals^2))
## [1] 5.800228
est_ses %>% autoplot()
est_holt %>% autoplot()
est_ses
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 57 78.60668 70.88413 86.32922 66.79606 90.41729
## 58 78.60668 67.68590 89.52746 61.90478 95.30857
est_holt
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 57 78.5536 70.29195 86.81524 65.91851 91.18869
## 58 78.5005 66.81680 90.18419 60.63183 96.36917
s1 <- sd(est_ses$residuals)
est_ses$mean[1] - 1.96 * s1
## [1] 67.13968
est_ses$mean[1] + 1.96 * s1
## [1] 90.07368
s2 <- sd(est_holt$residuals)
est_holt$mean[1] - 1.96 * s2
## [1] 66.9291
est_holt$mean[1] + 1.96 * s2
## [1] 90.17809