# Required Libraries
library(fpp3)
library(rstudioapi)
# Read in the data
temps = read.csv(file.path(dirname(documentPath()), "global_climate_data.csv"))
# Filter down to Oregon
temps_or = temps %>%
filter(State == "Oregon") %>%
select(-c(Country, AverageTemperatureUncertainty, State))
# Convert to tsibble
temps_or = temps_or %>%
mutate(Month = yearmonth(dt)) %>%
select(-dt) %>%
as_tsibble(index = Month)
There is a noticeable gap prior to 1850 so I filter all data before that period
# Filter to after 1850 to remove the NAs
temps_or = temps_or %>%
filter(year(Month) >= 1850)
# Confirm all NAs are gone
sum(is.na(temps_or$AverageTemperature))
## [1] 0
We can see there are no more missing values in the data. No we will build our training and test sets to begin building and evaluating models.
# Split Training and Test Sets (80/20 Split)
total_obs = dim(temps_or)[1]
train_obs = total_obs * 0.8
test_obs = total_obs - train_obs
train_or = head(temps_or, train_obs)
test_or = tail(temps_or, test_obs)
I let the ETS() function automatically pick the model based on AIC and it chose (A,A,A).
ets_auto = train_or %>%
model(ETS(AverageTemperature))
report(ets_auto)
## Series: AverageTemperature
## Model: ETS(A,A,A)
## Smoothing parameters:
## alpha = 0.05765157
## beta = 0.0001000001
## gamma = 0.0001007599
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 8.07355 0.0002648677 -8.290387 -4.717011 0.3875213 5.577286 9.490004 10.18304
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 6.414181 2.801691 -1.062102 -4.382604 -7.086436 -9.315185
##
## sigma^2: 2.2255
##
## AIC AICc BIC
## 12845.58 12845.97 12936.70
components(ets_auto) %>%
autoplot()
# Forecast
ets_auto_forecast = ets_auto %>%
forecast(h = test_obs)
accuracy(object = ets_auto_forecast, data = temps_or)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(AverageTemperature) Test 0.0803 1.50 1.19 -7.38 75.9 0.746 0.725 0.199
ets_auto_forecast %>%
autoplot(test_or)
For the next two models I wanted to see if there was significant importance weighted towards either trend or season. For this model I changed season() to “none.”
ets_aan = train_or %>%
model(ETS(AverageTemperature ~ error("A") + trend("A") + season("N")))
report(ets_aan)
## Series: AverageTemperature
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.006530937
##
## Initial states:
## l[0] b[0]
## -0.6570252 1.443454
##
## sigma^2: 16.0514
##
## AIC AICc BIC
## 15939.63 15939.66 15966.43
components(ets_aan) %>%
autoplot()
# Forecast
ets_aan_forecast = ets_aan %>%
forecast(h = test_obs)
accuracy(object = ets_aan_forecast, data = temps_or)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(AverageTemperature… Test 14.6 16.8 14.8 83.7 709. 9.26 8.11 0.877
ets_aan_forecast %>%
autoplot(test_or)
For the final model I evaluated trend() as “none.”
ets_ana = train_or %>%
model(ETS(AverageTemperature ~ error("A") + trend("N") + season("A")))
report(ets_ana)
## Series: AverageTemperature
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.004414556
## gamma = 0.003841693
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 8.056684 -8.040521 -4.680518 0.5146967 5.705081 9.378003 9.938094 6.3677
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 2.666623 -1.221867 -4.347748 -6.9048 -9.374743
##
## sigma^2: 2.2286
##
## AIC AICc BIC
## 12845.78 12846.09 12926.18
components(ets_ana) %>%
autoplot()
# Forecast
ets_ana_forecast = ets_ana %>%
forecast(h = test_obs)
accuracy(object = ets_ana_forecast, data = temps_or)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(AverageTemperature… Test 0.376 1.55 1.23 -8.18 85.4 0.775 0.747 0.188
ets_ana_forecast %>%
autoplot(test_or)