library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
library(mltools)
##
## Attaching package: 'mltools'
## The following object is masked from 'package:tidyr':
##
## replace_na
library(tidyr)
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.2
library(tseries)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
library(sjPlot)
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
library(sjmisc)
##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:mltools':
##
## replace_na
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
library(sjlabelled)
##
## Attaching package: 'sjlabelled'
## The following object is masked from 'package:forcats':
##
## as_factor
## The following object is masked from 'package:dplyr':
##
## as_label
## The following object is masked from 'package:ggplot2':
##
## as_label
library(readxl)
Fuel_prices_data <- read_excel("/Users/nishanthreddy/Downloads/Fuel_prices_data.xlsx")
head(Fuel_prices_data)
## # A tibble: 6 × 2
## Week Price
## <dttm> <dbl>
## 1 2022-01-17 00:00:00 3.40
## 2 2022-01-10 00:00:00 3.39
## 3 2022-01-03 00:00:00 3.38
## 4 2021-12-27 00:00:00 3.38
## 5 2021-12-20 00:00:00 3.40
## 6 2021-12-13 00:00:00 3.41
str(Fuel_prices_data)
## tibble [1,503 × 2] (S3: tbl_df/tbl/data.frame)
## $ Week : POSIXct[1:1503], format: "2022-01-17" "2022-01-10" ...
## $ Price: num [1:1503] 3.4 3.39 3.38 3.38 3.4 ...
Fuel_data <- Fuel_prices_data[which(Fuel_prices_data$`Week` >= "2011-01-01" & Fuel_prices_data$`Week` <= "2021-12-31" ),]
#line chart
ggplot(data = Fuel_data, aes(x = Fuel_data$Week, y = Fuel_data$Price )) + theme_bw()+
geom_line() + labs(title = "Line Chart")
## Warning: Use of `Fuel_data$Week` is discouraged. Use `Week` instead.
## Warning: Use of `Fuel_data$Price` is discouraged. Use `Price` instead.
## Time series appears to be variance stationary through out the time frame. However, we’ll test the transformations to assure.
Fuel_Data_roll <- Fuel_data %>%
mutate(
Price_mean = zoo::rollmean(
Price,
k = 180,
fill = NA),
Price_sd = zoo::rollapply(
Price,
FUN = sd,
width = 180,
fill = NA)
)
Price_roll_mean <- Fuel_Data_roll %>%
ggplot() +
geom_line(aes(Week, Price_mean)) +
theme_bw() +
ggtitle("Fuel Prices Mean over Time (6 month rolling window)") +
ylab("Adjusted Fuel Price") +
xlab("Years")
Price_roll_sd <-Fuel_Data_roll %>%
ggplot() +
geom_line(aes(Week, Price_sd)) +
theme_bw() +
ggtitle("Differenced Fuel Prices SD over Time (6 month rolling window)") +
ylab("Adjusted Fuel Price") +
xlab("Years")
Price_roll_mean
## Warning: Removed 179 row(s) containing missing values (geom_path).
Price_roll_sd
## Warning: Removed 179 row(s) containing missing values (geom_path).
## ADF Test
print(adf.test(Fuel_data$Price))
##
## Augmented Dickey-Fuller Test
##
## data: Fuel_data$Price
## Dickey-Fuller = -3.4053, Lag order = 8, p-value = 0.05253
## alternative hypothesis: stationary
kpss.test(Fuel_data$Price, null="Trend")
## Warning in kpss.test(Fuel_data$Price, null = "Trend"): p-value smaller than
## printed p-value
##
## KPSS Test for Trend Stationarity
##
## data: Fuel_data$Price
## KPSS Trend = 0.95139, Truncation lag parameter = 6, p-value = 0.01
Fuel_data_log = Fuel_data %>%
mutate(Price_log = log1p(Fuel_data$Price))
Fuel_data_log %>%
ggplot()+
geom_line(aes(Week,Price_log ))+
theme_bw()+
ggtitle("Fuel Prices Over Time (Log)")+
ylab("Fuel Price (Log)")+
xlab("Years")
## Box-Cox Transformation
Fuel_Data_boxcox <- Fuel_data %>%
mutate(Price_boxcox = forecast::BoxCox(Fuel_data$Price,lambda='auto'))
Fuel_Data_boxcox %>%
ggplot()+
geom_line(aes(Week,Price_boxcox))+
theme_bw()+
ggtitle("Fuel Prices over Time (Box-Cox Transformation)")+
ylab(" Price (Box-Cox Tranformation)")+
xlab("Years")
## The time series after being log-transformed and Box-Cox transformed are extremely identical.
Fuel_diff <- Fuel_data_log %>%
mutate(Price_diff = Fuel_data$Price - lag(Fuel_data$Price))
Fuel_diff %>% select(Week, Price, Price_diff) %>%
head()
## # A tibble: 6 × 3
## Week Price Price_diff
## <dttm> <dbl> <dbl>
## 1 2021-12-27 00:00:00 3.38 NA
## 2 2021-12-20 00:00:00 3.40 0.0200
## 3 2021-12-13 00:00:00 3.41 0.0190
## 4 2021-12-06 00:00:00 3.44 0.0260
## 5 2021-11-29 00:00:00 3.48 0.0380
## 6 2021-11-22 00:00:00 3.49 0.0150
ggplot() + geom_line(aes(Fuel_diff$Week,Fuel_diff$Price_diff)) + theme_bw() + ggtitle("Fuel Price (First Difference)") + ylab("Price(Difference))") + xlab("Year")
## Warning: Removed 1 row(s) containing missing values (geom_path).
acf(Fuel_diff$Price)
## We can see that the time series has no seasonality in the ACF plot above. ## In the ACF plot, there is a Dampening effect. As a result, the time series is entirely an AR process.
pacf(Fuel_diff$Price)
## We can see from the PACF that there are two statistically significant autocorrelations. As a result, the order of the AR process is 2. ARIMA is given as (2,1,0).
Fuel_data_log_diff <- Fuel_data %>%
arrange(Fuel_data$Week) %>%
mutate(Price_log = log1p(Fuel_data$Price), Price_diff = Price - lag(Fuel_data$Price),
Price_log_diff = Price_log - lag(Price_log) ) %>%
drop_na()
Fuel_data_log_diff %>%
ggplot() + geom_line(aes(Fuel_data_log_diff$Week,Fuel_data_log_diff$Price_diff)) + theme_bw() + ggtitle("Fuel Price (First Difference)") + ylab("Price(Difference))") + xlab("Year")
## Warning: Use of `Fuel_data_log_diff$Week` is discouraged. Use `Week` instead.
## Warning: Use of `Fuel_data_log_diff$Price_diff` is discouraged. Use `Price_diff`
## instead.
## Re-verifying using the ADF Test
adf.test(Fuel_data_log_diff$Price)
##
## Augmented Dickey-Fuller Test
##
## data: Fuel_data_log_diff$Price
## Dickey-Fuller = -2.2758, Lag order = 8, p-value = 0.4615
## alternative hypothesis: stationary
adf.test(Fuel_data_log_diff$Price_diff)
##
## Augmented Dickey-Fuller Test
##
## data: Fuel_data_log_diff$Price_diff
## Dickey-Fuller = -3.175, Lag order = 8, p-value = 0.09224
## alternative hypothesis: stationary
adf.test(Fuel_data_log_diff$Price_log)
##
## Augmented Dickey-Fuller Test
##
## data: Fuel_data_log_diff$Price_log
## Dickey-Fuller = -3.409, Lag order = 8, p-value = 0.0519
## alternative hypothesis: stationary
adf.test(Fuel_data_log_diff$Price_log_diff)
## Warning in adf.test(Fuel_data_log_diff$Price_log_diff): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: Fuel_data_log_diff$Price_log_diff
## Dickey-Fuller = -6.9354, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(Fuel_data_log_diff$Price)
## Warning in kpss.test(Fuel_data_log_diff$Price): p-value smaller than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: Fuel_data_log_diff$Price
## KPSS Level = 3.9025, Truncation lag parameter = 6, p-value = 0.01
.test(Fuel_data_log_diff$Price_diff)
kpss.test(Fuel_data_log_diff$Price_diff)
## Warning in kpss.test(Fuel_data_log_diff$Price_diff): p-value smaller than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: Fuel_data_log_diff$Price_diff
## KPSS Level = 6.0787, Truncation lag parameter = 6, p-value = 0.01
kpss.test(Fuel_data_log_diff$Price_log)
## Warning in kpss.test(Fuel_data_log_diff$Price_log): p-value smaller than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: Fuel_data_log_diff$Price_log
## KPSS Level = 3.7162, Truncation lag parameter = 6, p-value = 0.01
kpss.test(Fuel_data_log_diff$Price_log_diff)
## Warning in kpss.test(Fuel_data_log_diff$Price_log_diff): p-value greater than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: Fuel_data_log_diff$Price_log_diff
## KPSS Level = 0.10982, Truncation lag parameter = 6, p-value = 0.1
##ARIMA
par(mfrow = c(1,2))
acf(Fuel_data_log_diff$Price_log_diff,lag.max=20)
pacf(Fuel_data_log_diff$Price_log_diff,lag.max=20)
## AIC of ARIMA Models
AIC(
arima(Fuel_data_log_diff$Price_log,order=c(0,1,1)),
arima(Fuel_data_log_diff$Price_log,order=c(0,1,2)),
arima(Fuel_data_log_diff$Price_log,order=c(0,1,3)),
arima(Fuel_data_log_diff$Price_log,order=c(1,1,0)),
arima(Fuel_data_log_diff$Price_log,order=c(1,1,1)),
arima(Fuel_data_log_diff$Price_log,order=c(2,1,0)),
arima(Fuel_data_log_diff$Price_log,order=c(3,1,0))
)
## df AIC
## arima(Fuel_data_log_diff$Price_log, order = c(0, 1, 1)) 2 -3568.583
## arima(Fuel_data_log_diff$Price_log, order = c(0, 1, 2)) 3 -3612.266
## arima(Fuel_data_log_diff$Price_log, order = c(0, 1, 3)) 4 -3622.255
## arima(Fuel_data_log_diff$Price_log, order = c(1, 1, 0)) 2 -3636.575
## arima(Fuel_data_log_diff$Price_log, order = c(1, 1, 1)) 3 -3634.718
## arima(Fuel_data_log_diff$Price_log, order = c(2, 1, 0)) 3 -3634.702
## arima(Fuel_data_log_diff$Price_log, order = c(3, 1, 0)) 4 -3633.285
BIC(
arima(Fuel_data_log_diff$Price_log,order=c(0,1,1)),
arima(Fuel_data_log_diff$Price_log,order=c(0,1,2)),
arima(Fuel_data_log_diff$Price_log,order=c(0,1,3)),
arima(Fuel_data_log_diff$Price_log,order=c(1,1,0)),
arima(Fuel_data_log_diff$Price_log,order=c(1,1,1)),
arima(Fuel_data_log_diff$Price_log,order=c(2,1,0)),
arima(Fuel_data_log_diff$Price_log,order=c(3,1,0))
)
## df BIC
## arima(Fuel_data_log_diff$Price_log, order = c(0, 1, 1)) 2 -3559.884
## arima(Fuel_data_log_diff$Price_log, order = c(0, 1, 2)) 3 -3599.219
## arima(Fuel_data_log_diff$Price_log, order = c(0, 1, 3)) 4 -3604.859
## arima(Fuel_data_log_diff$Price_log, order = c(1, 1, 0)) 2 -3627.877
## arima(Fuel_data_log_diff$Price_log, order = c(1, 1, 1)) 3 -3621.670
## arima(Fuel_data_log_diff$Price_log, order = c(2, 1, 0)) 3 -3621.655
## arima(Fuel_data_log_diff$Price_log, order = c(3, 1, 0)) 4 -3615.888
auto.arima(Fuel_data_log_diff$Price_log,stationary=FALSE,allowdrift=FALSE,
seasonal=FALSE,stepwise=FALSE,approximation=FALSE)
## Series: Fuel_data_log_diff$Price_log
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## 0.6031
## s.e. 0.0332
##
## sigma^2 = 0.0001009: log likelihood = 1820.29
## AIC=-3636.58 AICc=-3636.55 BIC=-3627.88
Model_1 = arima(Fuel_data_log_diff$Price_log,order=c(1,1,0))
forecast::checkresiduals(Model_1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 7.5908, df = 9, p-value = 0.5758
##
## Model df: 1. Total lags used: 10
par(mfrow=c(1,2))
res = Model_1$resid
acf(res,lag.max=20)
pacf(res,lag.max=20)
## Predicting Model Performance using Forecast
plot(forecast(Model_1,h=150),main="ARIMA(1,1,0) Forecast",ylab="Fuel Price per Gallon ($)",xlab="Date")
prediction <- Arima(Fuel_data_log_diff$Price, model = Model_1)
accuracy(prediction)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0001401753 0.0390067 0.02871679 0.009395186 0.9943248 0.7846193
## ACF1
## Training set -0.006829676