Forecast Exercises
# 1. After importing the data:
# Now, I am filtering the data to only contain
# data from 2010:
gas <- Weekly_gas %>%
filter(year(`Week of`) >= 2010) %>%
mutate(Week = yearweek(`Week of`)) %>%
as_tsibble(index = Week) %>%
select(Week, `Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`)
gas
## # A tsibble: 666 x 2 [1W]
## Week Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dol…¹
## <week> <dbl>
## 1 2010 W01 2.72
## 2 2010 W02 2.80
## 3 2010 W03 2.79
## 4 2010 W04 2.76
## 5 2010 W05 2.72
## 6 2010 W06 2.71
## 7 2010 W07 2.66
## 8 2010 W08 2.71
## 9 2010 W09 2.76
## 10 2010 W10 2.80
## # … with 656 more rows, and abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
## # ℹ Use `print(n = ...)` to see more rows
View(gas)
# Now, I am plotting it:
autoplot(gas) +
labs(y = "Price (dollars per gallon)", title = "Weekly US Gas Prices")
## Plot variable not specified, automatically selected `.vars = Weekly U.S. All
## Grades All Formulations Retail Gasoline Prices Dollars per Gallon`

# 2.
# I am finding the value of lambda for my
# appropriate box_cox transformation
gas1 <- gas %>%
features(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`, features = guerrero)
gas1
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 -0.330
# Thus, the variance is now more centered:
gas %>%
autoplot(box_cox(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`, -0.330)) +
labs(y = "Box-Cox transformed Prices")

# Now, I am using STL decomposition to see the different
# components of the additive form.
gas3 <- gas %>%
model(classical_decomposition(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`, type = "additive")) %>%
components() %>%
autoplot()
gas3
## Warning: Removed 26 row(s) containing missing values (geom_path).

# The remainders seem to be seasonal, so this is an issue.
# Now, I want to plot the seasonally adjusted data with the
# original data
gas4 <- gas %>%
model(classical_decomposition(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`, type = "additive")) %>%
components() %>%
select(season_adjust)
View(gas4)
autoplot(gas) +
autolayer(gas4, color = "red")
## Plot variable not specified, automatically selected `.vars = Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
## Plot variable not specified, automatically selected `.vars = season_adjust`

# Now I am going to employ the MA to find an estimate of the
# trend-cycle with m = 52 to denote that it is weekly data with
# annual seasonality.
gas2 <- gas %>%
mutate('52-MA' = slider::slide_dbl(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`, mean, .before = 26, .after = 25, .complete = TRUE),
'2x52-MA' = slider::slide_dbl(`52-MA`, mean, .before = 1, .after = 0, .complete = TRUE))
mean_gas <- gas2 %>%
select('2x52-MA')
View(mean_gas)
autoplot(gas) +
autolayer(mean_gas, color = "red")
## Plot variable not specified, automatically selected `.vars = Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
## Plot variable not specified, automatically selected `.vars = 2x52-MA`
## Warning: Removed 52 row(s) containing missing values (geom_path).

# This is a depiction of the trend-cycle.
# We can notice by the graphs that there is a instant decline
# in gas prices around 2020 which depicts the start of COVID-19
# when people started to stay home more often. The instant
# spike is from a couple of factors, for example, people started
# coming out from quarentine and Russia cut off all pipeline
# gas to Western countries who imposed sanctions.
# 3.
# For the following, h = 2, since I am forecasting 2 weeks in
# advance (2 observations)
# I am starting with the naive method:
naive <- gas %>%
model(Naive = NAIVE(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`)) %>%
forecast(h = 2)
naive
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. All Grades All Formulations Retail Gasolin…¹ .mean
## <chr> <week> <dist> <dbl>
## 1 Naive 2022 W41 N(3.9, 0.0031) 3.91
## 2 Naive 2022 W42 N(3.9, 0.0062) 3.91
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# The two future values are 3.91 which is the last value of the data
drift <- gas %>%
model(Drift = RW(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon` ~ drift())) %>%
forecast(h = 2)
drift
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. All Grades All Formulations Retail Gasolin…¹ .mean
## <chr> <week> <dist> <dbl>
## 1 Drift 2022 W41 N(3.9, 0.0031) 3.91
## 2 Drift 2022 W42 N(3.9, 0.0062) 3.91
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# The two future values are still 3.91 which is weird since it should
# have increased by the average slope of the historical data.
mean <- gas %>%
model(mean = MEAN(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`)) %>%
forecast(h = 2)
mean
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. All Grades All Formulations Retail Gasolin…¹ .mean
## <chr> <week> <dist> <dbl>
## 1 mean 2022 W41 N(3, 0.38) 3.01
## 2 mean 2022 W42 N(3, 0.38) 3.01
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# These two values are the mean of the historical data which is 3.01
snaive <- gas %>%
model(seasonal_naive = SNAIVE(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon` ~ lag(52))) %>%
forecast(h = 2)
snaive
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. All Grades All Formulations Retail…¹ .mean
## <chr> <week> <dist> <dbl>
## 1 seasonal_naive 2022 W41 N(3.4, 0.38) 3.36
## 2 seasonal_naive 2022 W42 N(3.4, 0.38) 3.42
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# The two values denote the seasonal naive. I feel like this will be the most
# appropriate method since the data seems to be seasonal
# 4.
# To decide which is most accurate, I am going to need a training set
# and test data.
train <- round(.70*row_number(gas$Week))
train
## [1] 1 1 2 3 4 4 5 6 6 7 8 8 9 10 10 11 12 13
## [19] 13 14 15 15 16 17 18 18 19 20 20 21 22 22 23 24 24 25
## [37] 26 27 27 28 29 29 30 31 31 32 33 34 34 35 36 36 37 38
## [55] 38 39 40 41 41 42 43 43 44 45 46 46 47 48 48 49 50 50
## [73] 51 52 52 53 54 55 55 56 57 57 58 59 59 60 61 62 62 63
## [91] 64 64 65 66 66 67 68 69 69 70 71 71 72 73 74 74 75 76
## [109] 76 77 78 78 79 80 80 81 82 83 83 84 85 85 86 87 88 88
## [127] 89 90 90 91 92 92 93 94 94 95 96 97 97 98 99 99 100 101
## [145] 102 102 103 104 104 105 106 106 107 108 108 109 110 111 111 112 113 113
## [163] 114 115 115 116 117 118 118 119 120 120 121 122 122 123 124 125 125 126
## [181] 127 127 128 129 130 130 131 132 132 133 134 134 135 136 136 137 138 139
## [199] 139 140 141 141 142 143 144 144 145 146 146 147 148 148 149 150 150 151
## [217] 152 153 153 154 155 155 156 157 158 158 159 160 160 161 162 162 163 164
## [235] 164 165 166 167 167 168 169 169 170 171 172 172 173 174 174 175 176 176
## [253] 177 178 178 179 180 181 181 182 183 183 184 185 186 186 187 188 188 189
## [271] 190 190 191 192 192 193 194 195 195 196 197 197 198 199 200 200 201 202
## [289] 202 203 204 204 205 206 206 207 208 209 209 210 211 211 212 213 214 214
## [307] 215 216 216 217 218 218 219 220 220 221 222 223 223 224 225 225 226 227
## [325] 227 228 229 230 230 231 232 232 233 234 234 235 236 237 237 238 239 239
## [343] 240 241 241 242 243 244 244 245 246 246 247 248 248 249 250 251 251 252
## [361] 253 253 254 255 255 256 257 258 258 259 260 260 261 262 262 263 264 265
## [379] 265 266 267 267 268 269 270 270 271 272 272 273 274 274 275 276 276 277
## [397] 278 279 279 280 281 281 282 283 284 284 285 286 286 287 288 288 289 290
## [415] 290 291 292 293 293 294 295 295 296 297 298 298 299 300 300 301 302 302
## [433] 303 304 304 305 306 307 307 308 309 309 310 311 312 312 313 314 314 315
## [451] 316 316 317 318 318 319 320 321 321 322 323 323 324 325 326 326 327 328
## [469] 328 329 330 330 331 332 332 333 334 335 335 336 337 337 338 339 340 340
## [487] 341 342 342 343 344 344 345 346 346 347 348 349 349 350 351 351 352 353
## [505] 354 354 355 356 356 357 358 358 359 360 360 361 362 363 363 364 365 365
## [523] 366 367 368 368 369 370 370 371 372 372 373 374 374 375 376 377 377 378
## [541] 379 379 380 381 382 382 383 384 384 385 386 386 387 388 388 389 390 391
## [559] 391 392 393 393 394 395 396 396 397 398 398 399 400 400 401 402 402 403
## [577] 404 405 405 406 407 407 408 409 410 410 411 412 412 413 414 414 415 416
## [595] 416 417 418 419 419 420 421 421 422 423 424 424 425 426 426 427 428 428
## [613] 429 430 430 431 432 433 433 434 435 435 436 437 438 438 439 440 440 441
## [631] 442 442 443 444 444 445 446 447 447 448 449 449 450 451 451 452 453 454
## [649] 454 455 456 456 457 458 458 459 460 461 461 462 463 463 464 465 465 466
week_train <- 666 - train
week_train
## [1] 665 665 664 663 662 662 661 660 660 659 658 658 657 656 656 655 654 653
## [19] 653 652 651 651 650 649 648 648 647 646 646 645 644 644 643 642 642 641
## [37] 640 639 639 638 637 637 636 635 635 634 633 632 632 631 630 630 629 628
## [55] 628 627 626 625 625 624 623 623 622 621 620 620 619 618 618 617 616 616
## [73] 615 614 614 613 612 611 611 610 609 609 608 607 607 606 605 604 604 603
## [91] 602 602 601 600 600 599 598 597 597 596 595 595 594 593 592 592 591 590
## [109] 590 589 588 588 587 586 586 585 584 583 583 582 581 581 580 579 578 578
## [127] 577 576 576 575 574 574 573 572 572 571 570 569 569 568 567 567 566 565
## [145] 564 564 563 562 562 561 560 560 559 558 558 557 556 555 555 554 553 553
## [163] 552 551 551 550 549 548 548 547 546 546 545 544 544 543 542 541 541 540
## [181] 539 539 538 537 536 536 535 534 534 533 532 532 531 530 530 529 528 527
## [199] 527 526 525 525 524 523 522 522 521 520 520 519 518 518 517 516 516 515
## [217] 514 513 513 512 511 511 510 509 508 508 507 506 506 505 504 504 503 502
## [235] 502 501 500 499 499 498 497 497 496 495 494 494 493 492 492 491 490 490
## [253] 489 488 488 487 486 485 485 484 483 483 482 481 480 480 479 478 478 477
## [271] 476 476 475 474 474 473 472 471 471 470 469 469 468 467 466 466 465 464
## [289] 464 463 462 462 461 460 460 459 458 457 457 456 455 455 454 453 452 452
## [307] 451 450 450 449 448 448 447 446 446 445 444 443 443 442 441 441 440 439
## [325] 439 438 437 436 436 435 434 434 433 432 432 431 430 429 429 428 427 427
## [343] 426 425 425 424 423 422 422 421 420 420 419 418 418 417 416 415 415 414
## [361] 413 413 412 411 411 410 409 408 408 407 406 406 405 404 404 403 402 401
## [379] 401 400 399 399 398 397 396 396 395 394 394 393 392 392 391 390 390 389
## [397] 388 387 387 386 385 385 384 383 382 382 381 380 380 379 378 378 377 376
## [415] 376 375 374 373 373 372 371 371 370 369 368 368 367 366 366 365 364 364
## [433] 363 362 362 361 360 359 359 358 357 357 356 355 354 354 353 352 352 351
## [451] 350 350 349 348 348 347 346 345 345 344 343 343 342 341 340 340 339 338
## [469] 338 337 336 336 335 334 334 333 332 331 331 330 329 329 328 327 326 326
## [487] 325 324 324 323 322 322 321 320 320 319 318 317 317 316 315 315 314 313
## [505] 312 312 311 310 310 309 308 308 307 306 306 305 304 303 303 302 301 301
## [523] 300 299 298 298 297 296 296 295 294 294 293 292 292 291 290 289 289 288
## [541] 287 287 286 285 284 284 283 282 282 281 280 280 279 278 278 277 276 275
## [559] 275 274 273 273 272 271 270 270 269 268 268 267 266 266 265 264 264 263
## [577] 262 261 261 260 259 259 258 257 256 256 255 254 254 253 252 252 251 250
## [595] 250 249 248 247 247 246 245 245 244 243 242 242 241 240 240 239 238 238
## [613] 237 236 236 235 234 233 233 232 231 231 230 229 228 228 227 226 226 225
## [631] 224 224 223 222 222 221 220 219 219 218 217 217 216 215 215 214 213 212
## [649] 212 211 210 210 209 208 208 207 206 205 205 204 203 203 202 201 201 200
# So, the train data is about 466 weeks (up until about 2015)
gas_train <- gas %>%
filter(year(Week) <= 2015)
recent_train <- gas %>%
filter(year(Week) >= 2010)
# Now, I am finding all the forecasts based on the training data set.
# Snaive:
snaive_fc <- gas_train %>%
model(SNAIVE(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon` ~ lag(52))) %>%
forecast(h = 2)
# With accuracy of
accuracy(snaive_fc, recent_train)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(`Weekly U.S. A… Test -0.151 0.152 0.151 -7.09 7.09 0.338 0.263 -0.5
# Drift:
drift_fc <- gas_train %>%
model(RW(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon` ~ drift())) %>%
forecast(h = 2)
drift_fc
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. …¹ .mean
## <chr> <week> <dist> <dbl>
## 1 "RW(`Weekly U.S. All Grades All Formulations Re… 2016 W01 N(2.1, 0.0028) 2.14
## 2 "RW(`Weekly U.S. All Grades All Formulations Re… 2016 W02 N(2.1, 0.0057) 2.14
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# with accuracy of
accuracy(drift_fc, recent_train)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "RW(`Weekly U.S.… Test -0.0187 0.0237 0.0187 -0.889 0.889 0.0421 0.0411 -0.5
# Naive:
naive_fc <- gas_train %>%
model(NAIVE(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`)) %>%
forecast(h = 2)
naive_fc
## # A fable: 2 x 4 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. …¹ .mean
## <chr> <week> <dist> <dbl>
## 1 NAIVE(`Weekly U.S. All Grades All Formulations … 2016 W01 N(2.1, 0.0028) 2.14
## 2 NAIVE(`Weekly U.S. All Grades All Formulations … 2016 W02 N(2.1, 0.0056) 2.14
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# with accuracy of
accuracy(naive_fc, recent_train)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NAIVE(`Weekly U.S… Test -0.0215 0.0265 0.0215 -1.02 1.02 0.0483 0.0459 -0.5
# Since drift has the lowest RMSE, it it considered to be the most appropriate method
# out of these three methods.
# 5.
# Let's create the training set by doing a stretch of 5, growing
# by 1 each step.
gas_stretch <- gas %>%
stretch_tsibble(.init = 5, .step = 1)
gas_stretch
## # A tsibble: 222,101 x 3 [1W]
## # Key: .id [662]
## Week Weekly U.S. All Grades All Formulations Retail Gasoline Pric…¹ .id
## <week> <dbl> <int>
## 1 2010 W01 2.72 1
## 2 2010 W02 2.80 1
## 3 2010 W03 2.79 1
## 4 2010 W04 2.76 1
## 5 2010 W05 2.72 1
## 6 2010 W01 2.72 2
## 7 2010 W02 2.80 2
## 8 2010 W03 2.79 2
## 9 2010 W04 2.76 2
## 10 2010 W05 2.72 2
## # … with 222,091 more rows, and abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
## # ℹ Use `print(n = ...)` to see more rows
# Now, I am estimating with the drift since it was the method with the lowest RMSE.
fit_gas <- gas_stretch %>%
model(RW(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon` ~ drift()))
fit_gas
## # A mable: 662 x 2
## # Key: .id [662]
## .id RW(\Weekly U.S. All Grades All Formulations Retail Gasoline Prices Do…¹
## <int> <model>
## 1 1 <RW w/ drift>
## 2 2 <RW w/ drift>
## 3 3 <RW w/ drift>
## 4 4 <RW w/ drift>
## 5 5 <RW w/ drift>
## 6 6 <RW w/ drift>
## 7 7 <RW w/ drift>
## 8 8 <RW w/ drift>
## 9 9 <RW w/ drift>
## 10 10 <RW w/ drift>
## # … with 652 more rows, and abbreviated variable name
## # ¹`RW(\`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon\` ~ \n drift())`
## # ℹ Use `print(n = ...)` to see more rows
# To then produce one step ahead forecasts
gas_cv <- fit_gas %>%
forecast(h = 1)
gas_cv
## # A fable: 662 x 5 [1W]
## # Key: .id, .model [662]
## .id .model Week Weekly U.S. …¹ .mean
## <int> <chr> <week> <dist> <dbl>
## 1 1 "RW(`Weekly U.S. All Grades All Formulat… 2010 W06 N(2.7, 0.0035) 2.72
## 2 2 "RW(`Weekly U.S. All Grades All Formulat… 2010 W07 N(2.7, 0.0026) 2.70
## 3 3 "RW(`Weekly U.S. All Grades All Formulat… 2010 W08 N(2.7, 0.0024) 2.66
## 4 4 "RW(`Weekly U.S. All Grades All Formulat… 2010 W09 N(2.7, 0.0024) 2.71
## 5 5 "RW(`Weekly U.S. All Grades All Formulat… 2010 W10 N(2.8, 0.0024) 2.76
## 6 6 "RW(`Weekly U.S. All Grades All Formulat… 2010 W11 N(2.8, 0.0023) 2.81
## 7 7 "RW(`Weekly U.S. All Grades All Formulat… 2010 W12 N(2.9, 0.0021) 2.85
## 8 8 "RW(`Weekly U.S. All Grades All Formulat… 2010 W13 N(2.9, 0.0019) 2.88
## 9 9 "RW(`Weekly U.S. All Grades All Formulat… 2010 W14 N(2.9, 0.0018) 2.86
## 10 10 "RW(`Weekly U.S. All Grades All Formulat… 2010 W15 N(2.9, 0.0017) 2.89
## # … with 652 more rows, and abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
## # ℹ Use `print(n = ...)` to see more rows
# Cross_validated accuracy:
gas_cv %>%
accuracy(gas)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 W41
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "RW(`Weekly U.… Test -5.93e-4 0.0559 0.0397 -0.0240 1.32 0.0838 0.0908 0.591
# Now, I am comparing this accuracy measure to my drift method forecast
accuracy(drift_fc, recent_train)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "RW(`Weekly U.S.… Test -0.0187 0.0237 0.0187 -0.889 0.889 0.0421 0.0411 -0.5
# It seems that drift method still remains to have the lowest RMSE.
# Therefore, it is considered to be our most accurate model.
# 6.
# Based on my analysis, I will estimate and plot the point
# forecast and 80% PI using the snaive method.
pred_int <- drift_fc %>%
hilo(level = 80)
pred_int
## # A tsibble: 2 x 5 [1W]
## # Key: .model [1]
## .model Week Weekly U.S. …¹ .mean `80%`
## <chr> <week> <dist> <dbl> <hilo>
## 1 "RW(`Weekly U.S. All Gra… 2016 W01 N(2.1, 0.0028) 2.14 [2.071141, 2.207161]80
## 2 "RW(`Weekly U.S. All Gra… 2016 W02 N(2.1, 0.0057) 2.14 [2.040967, 2.233636]80
## # … with abbreviated variable name
## # ¹`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon`
# The point forecast is the mean which is 2.14 while the
# prediction intervals can be seen from the graph.
# I am plottig the drift method with the prediction interval.
# I chose h = 52 to be able to see the prediction interval a year
# ahead (plot looked better).
gas %>%
model(RW(`Weekly U.S. All Grades All Formulations Retail Gasoline Prices Dollars per Gallon` ~ drift())) %>%
forecast(h = 52) %>%
autoplot(recent_train)+
labs(y = "Price", title = "Weekly US gas")
