aus_pop = global_economy%>%
filter(Country == 'Australia' & Year >= 2000)%>%
select(Population)
aus_pop%>%
model(RW(Population ~ drift()))%>%
forecast()%>%
autoplot(aus_pop)+
labs(title = 'Australian Population Forecast')
aus_brick = aus_production%>%
na.omit()%>%
select(Bricks)
aus_brick%>%
model(NAIVE(Bricks))%>%
forecast()%>%
autoplot(aus_brick)+
labs(title = 'Australian Brick Production Forecast')
nsw_lambs = aus_livestock%>%
filter(Animal == 'Lambs' & State == 'New South Wales')%>%
select(Count)
nsw_lambs%>%
model(SNAIVE(Count))%>%
forecast()%>%
autoplot(nsw_lambs)+
labs(title = 'Lamb Slaughter Count in New South Wales')
usa_wealth = hh_budget%>%
filter(Country == 'USA')%>%
select(Wealth)
usa_wealth%>%
model(NAIVE(Wealth))%>%
forecast()%>%
autoplot(usa_wealth)+
labs(title = 'Forecast of Wealth in the US')
aus_turnover = aus_retail%>%
filter(Industry == 'Takeaway food services' & State == 'South Australia' )%>%
select(Turnover)
aus_turnover%>%
model(SNAIVE(Turnover))%>%
forecast()%>%
autoplot(aus_turnover)+
labs(title = 'South Australian Takeaway Food Turnover')
fb = gafa_stock%>%
filter(Symbol == 'FB')%>%
update_tsibble(regular=TRUE)%>%
fill_gaps()%>%
select(Open)
# plot using stock open price
fb%>%
ggplot(aes(x = Date, y = Open))+
geom_line()+
labs(title = 'Facebook Stock Open Price')
fb_forecast = fb%>%
model(RW(Open~drift()))%>%
forecast(h=50)
fb_forecast
## # A fable: 50 x 4 [1D]
## # Key: .model [1]
## .model Date Open .mean
## <chr> <date> <dist> <dbl>
## 1 RW(Open ~ drift()) 2019-01-01 N(135, 6.5) 135.
## 2 RW(Open ~ drift()) 2019-01-02 N(135, 13) 135.
## 3 RW(Open ~ drift()) 2019-01-03 N(135, 20) 135.
## 4 RW(Open ~ drift()) 2019-01-04 N(135, 26) 135.
## 5 RW(Open ~ drift()) 2019-01-05 N(135, 33) 135.
## 6 RW(Open ~ drift()) 2019-01-06 N(135, 39) 135.
## 7 RW(Open ~ drift()) 2019-01-07 N(135, 46) 135.
## 8 RW(Open ~ drift()) 2019-01-08 N(135, 53) 135.
## 9 RW(Open ~ drift()) 2019-01-09 N(135, 59) 135.
## 10 RW(Open ~ drift()) 2019-01-10 N(135, 66) 135.
## # ... with 40 more rows
fb_forecast%>%
autoplot(fb)+
labs(title = 'Facebook Stock Price Forecast')
end = dim(fb)[1]
# first point
x_start = fb[1,]$Date
y_start = fb[1,]$Open
# last point
x_end = fb[end,]$Date
y_end = fb[end,]$Open
fb%>%
model(RW(Open~drift()))%>%
forecast(h=50)%>%
autoplot(fb, level=NULL)+
geom_segment(aes(x = x_start,
y = y_start,
xend = x_end,
yend = y_end))+
labs(title = 'Facebook Stock Price Forecast')
I think the drift method is the best at forecasting FB stock because it is able to capture the underlying uptrend.
fb%>%
model(NAIVE(Open))%>%
forecast(h=50)%>%
autoplot(fb)
fb%>%
model(SNAIVE(Open~lag()))%>%
forecast(h=50)%>%
autoplot(fb)
## Warning: Removed 2 row(s) containing missing values (geom_path).
fb%>%
model(MEAN(Open))%>%
forecast(h=50)%>%
autoplot(fb)
From the histogram, we see that the distribution of residuals is almost normal with a mean near 0. This means the SNAIVE model is a good fit as deviations from the model are not correlated
# Extract data of interest
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)
As there seems to be no seasonal trends, a NAIVE forecast should work better than SNAIVE for the dataset. Using the NAIVE method, we see that the residuals are normal and not correlated which signal a good fit
aus_export = global_economy%>%
filter(Country == 'Australia')%>%
select(Exports)
aus_export%>%
autoplot(Exports)
export_model = aus_export%>%
model(NAIVE(Exports))
export_model%>%
gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
export_model%>%
forecast()%>%
autoplot(aus_export)
Looking at the residuals, the NAIVE model seems to be a better fit the australian bricks than the SNAIVE model. This is despite the fact that there seems to be seasonal changes in the original plot
aus_brick%>%
autoplot(Bricks)
brick_model = aus_brick%>%
model(NAIVE(Bricks))
brick_model%>%
gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
brick_model%>%
forecast()%>%
autoplot(aus_brick)
set.seed(8009)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train = myseries %>%
filter(year(Month) < 2011)
head(myseries_train)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Australia Clothing, footwear and person~ A3349825J 1982 Apr 28.8
## 2 Western Australia Clothing, footwear and person~ A3349825J 1982 May 32.1
## 3 Western Australia Clothing, footwear and person~ A3349825J 1982 Jun 28.5
## 4 Western Australia Clothing, footwear and person~ A3349825J 1982 Jul 29
## 5 Western Australia Clothing, footwear and person~ A3349825J 1982 Aug 25.3
## 6 Western Australia Clothing, footwear and person~ A3349825J 1982 Sep 26.9
Red plot shows less than 2011 filter worked
autoplot(myseries, Turnover)+
autolayer(myseries_train, Turnover, colour = "red")
turnover_model = myseries_train %>%
model(SNAIVE(Turnover))
The residuals are normal with a mean of 0. The variance between residuals seem to have grown larger with time
turnover_model%>%
gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
model is able to somewhat accurately capture the future trend
turnover_forecast = turnover_model%>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
turnover_forecast%>%
autoplot(myseries)
turnover_model %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Western~ Clothin~ SNAIV~ Trai~ 4.57 11.9 8.93 5.29 9.64 1 1 0.771
turnover_forecast %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T~ West~ Clothin~ Test 4.31 11.8 9.25 2.00 5.55 1.04 0.992 0.392
by using less data in our training model, our forecast is less accurate based on all accuracy metrics
myseries_train2 = myseries %>%
filter(year(Month) < 2000)
turnover_forecast2 = myseries_train2%>%
model(SNAIVE(Turnover))%>%
forecast(new_data = anti_join(myseries, myseries_train2))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
turnover_forecast2%>%
accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T~ West~ Clothin~ Test 60.8 66.9 60.8 39.0 39.0 9.84 8.86 0.710
turnover_forecast2%>%
autoplot(myseries)
I attempted to perform box-cox on the dataset from HW 2 3.3 by splitting the time series into two graphs (Increasing and Decreasing) The results are much better than trying to perform the time series on the entire plot itself
get_lambda = function(data, feature){
data%>%
features(data[feature],features = guerrero)%>%
pull(lambda_guerrero)%>%
return()
}
under_1980 = canadian_gas%>%
filter_index('1960 Jan' ~ '1980 Jan')
under_1980%>%
autoplot(box_cox(x = Volume,
lambda = get_lambda(under_1980,'Volume')))
over_1980 = canadian_gas%>%
filter_index('1980 Jan' ~ '2005 Feb')
over_1980%>%
autoplot(box_cox(x = Volume,
lambda = get_lambda(over_1980,'Volume')))