Predictive Analytics - Discussion 2

Import Data and Build Initial Plots

# Read in the data
temps = read.csv(file.path(dirname(documentPath()), "global_climate_data.csv"))

# Filter down to Oregon
temps_or = temps %>% 
  filter(State == "Oregon") %>% 
  select(-c(Country, AverageTemperatureUncertainty, State))

# Convert to tsibble
temps_or = temps_or %>% 
  mutate(Month = yearmonth(dt)) %>% 
  select(-dt) %>% 
  as_tsibble(index = Month)

Reduce Data to Complete Set

There is a noticeable gap prior to 1850 so I filter all data before that period

# Filter to after 1850 to remove the NAs
temps_or = temps_or %>% 
  filter(year(Month) >= 1850)

# Confirm all NAs are gone
sum(is.na(temps_or$AverageTemperature))

## [1] 0

Model and Evaluate

We can see there are no more missing values in the data. No we will build our training and test sets to begin building and evaluating models.

# Split Training and Test Sets (80/20 Split)
total_obs = dim(temps_or)[1]
train_obs = total_obs * 0.8
test_obs = total_obs - train_obs
train_or = head(temps_or, train_obs)
test_or = tail(temps_or, test_obs)

ETS (A,A,A)

I let the ETS() function automatically pick the model based on AIC and it chose (A,A,A).

ets_auto = train_or %>% 
  model(ETS(AverageTemperature))

report(ets_auto)

## Series: AverageTemperature 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.05765157 
##     beta  = 0.0001000001 
##     gamma = 0.0001007599 
## 
##   Initial states:
##     l[0]         b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  8.07355 0.0002648677 -8.290387 -4.717011 0.3875213 5.577286 9.490004 10.18304
##     s[-6]    s[-7]     s[-8]     s[-9]    s[-10]    s[-11]
##  6.414181 2.801691 -1.062102 -4.382604 -7.086436 -9.315185
## 
##   sigma^2:  2.2255
## 
##      AIC     AICc      BIC 
## 12845.58 12845.97 12936.70

components(ets_auto) %>% 
  autoplot()

# Forecast
ets_auto_forecast = ets_auto %>% 
  forecast(h = test_obs)

accuracy(object = ets_auto_forecast, data = temps_or)

## # A tibble: 1 × 10
##   .model                  .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(AverageTemperature) Test  0.0803  1.50  1.19 -7.38  75.9 0.746 0.725 0.199

ets_auto_forecast %>% 
  autoplot(test_or)

ETS (A,A,N)

For the next two models I wanted to see if there was significant importance weighted towards either trend or season. For this model I changed season() to “none.”

ets_aan = train_or %>% 
  model(ETS(AverageTemperature ~ error("A") + trend("A") + season("N")))

report(ets_aan)

## Series: AverageTemperature 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.006530937 
## 
##   Initial states:
##        l[0]     b[0]
##  -0.6570252 1.443454
## 
##   sigma^2:  16.0514
## 
##      AIC     AICc      BIC 
## 15939.63 15939.66 15966.43

components(ets_aan) %>% 
  autoplot()

# Forecast
ets_aan_forecast = ets_aan %>% 
  forecast(h = test_obs)

accuracy(object = ets_aan_forecast, data = temps_or)

## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(AverageTemperature… Test   14.6  16.8  14.8  83.7  709.  9.26  8.11 0.877

ets_aan_forecast %>% 
  autoplot(test_or)

ETS (A,N,A)

For the final model I evaluated trend() as “none.”

ets_ana = train_or %>% 
  model(ETS(AverageTemperature ~ error("A") + trend("N") + season("A")))

report(ets_ana)

## Series: AverageTemperature 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.004414556 
##     gamma = 0.003841693 
## 
##   Initial states:
##      l[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]  s[-6]
##  8.056684 -8.040521 -4.680518 0.5146967 5.705081 9.378003 9.938094 6.3677
##     s[-7]     s[-8]     s[-9]  s[-10]    s[-11]
##  2.666623 -1.221867 -4.347748 -6.9048 -9.374743
## 
##   sigma^2:  2.2286
## 
##      AIC     AICc      BIC 
## 12845.78 12846.09 12926.18

components(ets_ana) %>% 
  autoplot()

# Forecast
ets_ana_forecast = ets_ana %>% 
  forecast(h = test_obs)

accuracy(object = ets_ana_forecast, data = temps_or)

## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(AverageTemperature… Test  0.376  1.55  1.23 -8.18  85.4 0.775 0.747 0.188

ets_ana_forecast %>% 
  autoplot(test_or)