library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ stringr 1.4.0
## ✓ tidyr 1.1.4 ✓ forcats 0.5.1
## ✓ readr 2.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
library(mltools)
##
## Attaching package: 'mltools'
## The following object is masked from 'package:tidyr':
##
## replace_na
library(tidyr)
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.2
library(tseries)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
library(sjPlot)
library(sjmisc)
##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:mltools':
##
## replace_na
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
library(sjlabelled)
##
## Attaching package: 'sjlabelled'
## The following object is masked from 'package:forcats':
##
## as_factor
## The following object is masked from 'package:ggplot2':
##
## as_label
## The following object is masked from 'package:dplyr':
##
## as_label
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:sjlabelled':
##
## as_character, as_label
## The following object is masked from 'package:sjmisc':
##
## is_empty
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
Fuel_prices_data <- read_excel("/Users/nishanthreddy/Downloads/Fuel_prices_data.xlsx")
head(Fuel_prices_data)
## # A tibble: 6 × 2
## Week Price
## <dttm> <dbl>
## 1 2022-01-17 00:00:00 3.40
## 2 2022-01-10 00:00:00 3.39
## 3 2022-01-03 00:00:00 3.38
## 4 2021-12-27 00:00:00 3.38
## 5 2021-12-20 00:00:00 3.40
## 6 2021-12-13 00:00:00 3.41
summary(Fuel_prices_data)
## Week Price
## Min. :1993-04-05 00:00:00 Min. :0.949
## 1st Qu.:2000-06-15 12:00:00 1st Qu.:1.329
## Median :2007-08-27 00:00:00 Median :2.284
## Mean :2007-08-27 00:00:00 Mean :2.242
## 3rd Qu.:2014-11-06 12:00:00 3rd Qu.:2.913
## Max. :2022-01-17 00:00:00 Max. :4.165
Fuel_data <- Fuel_prices_data %>%
filter(Week >= ymd("2000=01-01")) %>% rename(Year = "Week",Avg_price ='Price') %>%
mutate(Month_yr = format(Year, "%Y-%m")) %>% distinct(Month_yr,.keep_all=TRUE)
as_tibble(Fuel_data)
## # A tibble: 265 × 3
## Year Avg_price Month_yr
## <dttm> <dbl> <chr>
## 1 2022-01-17 00:00:00 3.40 2022-01
## 2 2021-12-27 00:00:00 3.38 2021-12
## 3 2021-11-29 00:00:00 3.48 2021-11
## 4 2021-10-25 00:00:00 3.48 2021-10
## 5 2021-09-27 00:00:00 3.27 2021-09
## 6 2021-08-30 00:00:00 3.24 2021-08
## 7 2021-07-26 00:00:00 3.23 2021-07
## 8 2021-06-28 00:00:00 3.18 2021-06
## 9 2021-05-31 00:00:00 3.12 2021-05
## 10 2021-04-26 00:00:00 2.96 2021-04
## # … with 255 more rows
##Prophet Model
prophet_fueldata = Fuel_data %>%
rename(ds = Year,
y = Avg_price)
train = prophet_fueldata %>%
filter(ds<ymd("2015-01-01"))
test = prophet_fueldata %>%
filter(ds>=ymd("2015-01-01"))
model = prophet(train)
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future = make_future_dataframe(model,periods = 85, freq = 'month')
forecast = predict(model,future)
plot(model,forecast)+
ylab("Average Fuel Price")+xlab("Date")+labs(title = 'Forecast: Average Fuel Price',subtitle = 'January 2000 - December 2021')+theme_bw()
prophet_plot_components(model,forecast)
##Finding Saturation points ### After finding the summaries of both the test and train data, we need to find the saturation points which means the threshold of the price which is reasonable as seen in the last 20 years of the data.
summary(train)
## ds y Month_yr
## Min. :2000-01-31 00:00:00 Min. :1.137 Length:180
## 1st Qu.:2003-10-20 00:00:00 1st Qu.:1.686 Class :character
## Median :2007-07-12 12:00:00 Median :2.654 Mode :character
## Mean :2007-07-13 07:36:00 Mean :2.560
## 3rd Qu.:2011-04-04 00:00:00 3rd Qu.:3.373
## Max. :2014-12-29 00:00:00 Max. :4.146
summary(test)
## ds y Month_yr
## Min. :2015-01-26 00:00:00 Min. :1.870 Length:85
## 1st Qu.:2016-10-31 00:00:00 1st Qu.:2.341 Class :character
## Median :2018-07-30 00:00:00 Median :2.589 Mode :character
## Mean :2018-07-28 02:32:28 Mean :2.604
## 3rd Qu.:2020-04-27 00:00:00 3rd Qu.:2.857
## Max. :2022-01-17 00:00:00 Max. :3.478
#setting floor for train data
train$floor = 1.13
train$cap = 4.14
#setting floor for forecast data
future$floor = 1.8
future$cap = 3.4
sat_model = prophet(train,growth='logistic')
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
sat_forecast = predict(sat_model,future)
plot(sat_model,sat_forecast)+ylim(0,4.5)+xlab("Date")+ylab("Avg Price")+labs(title = 'Forecast: Average Price of Fuel ',subtitle = 'Saturation Points')+theme_bw()
### We set these saturation thresholds for the prophet model, and the resulting figure shows that the predicted values are substantially different from the previous forecast, and that the increasing trend is not as sharp as it was before the limitations were established.
##Examination for Change points
model = prophet(train, n.changepoints = 10)
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future = make_future_dataframe(model,periods = 85, freq = 'month')
plot(model,forecast)+add_changepoints_to_plot(model)+xlab("Date")+ylab("Avg Price")+labs(title = 'Forecast: Average Price of Prices',subtitle = 'January 2000 - December 2021')+theme_bw()
##Chekcing the Type of Seasonilty
additive = prophet(train)
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
add_fcst = predict(additive,future)
plot(additive,add_fcst)+
ylim(0,5)+xlab("Date")+ylab("Avg Price")+labs(title = 'Forecast: Average Price of Fuel',subtitle = 'Additive Seasonality Model')+theme_bw()
prophet_plot_components(additive,add_fcst)
multi = prophet(train,seasonality.mode = 'multiplicative')
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
multi_fcst = predict(multi,future)
plot(multi,multi_fcst)+ylim(0,5) +theme_bw()+xlab("Date")+ylab("Avg Price")+labs(title = 'Forecast: Average Price of Fuel',subtitle = 'Multiplicative Seasonality Model')
prophet_plot_components(multi,multi_fcst)
forecast_metric_data = forecast %>%
as_tibble() %>%
filter(ds>=ymd("2015-01-01"))
## Warning in base::check_tzones(e1, e2): 'tzone' attributes are inconsistent
RMSE = sqrt(mean((test$y - forecast_metric_data$yhat)^2))
MAE = mean(abs(test$y - forecast_metric_data$yhat))
MAPE = mean(abs((test$y - forecast_metric_data$yhat)/test$y))
print(paste("RMSE:",round(RMSE,2)))
## [1] "RMSE: 1.85"
print(paste("MAE:",round(MAE,2)))
## [1] "MAE: 1.76"
print(paste("MAPE:",round(MAPE,2)))
## [1] "MAPE: 0.71"
df.cv <- cross_validation(model ,horizon=365,period = 365,initial = 5*365,units='days')
## Making 9 forecasts with cutoffs between 2005-12-31 and 2013-12-29
head(df.cv)
## # A tibble: 6 × 6
## y ds yhat yhat_lower yhat_upper cutoff
## <dbl> <dttm> <dbl> <dbl> <dbl> <dttm>
## 1 2.40 2006-01-30 00:00:00 2.34 2.21 2.47 2005-12-31 00:00:00
## 2 2.30 2006-02-27 00:00:00 2.40 2.27 2.52 2005-12-31 00:00:00
## 3 2.54 2006-03-27 00:00:00 2.53 2.40 2.66 2005-12-31 00:00:00
## 4 2.96 2006-04-24 00:00:00 2.52 2.39 2.66 2005-12-31 00:00:00
## 5 2.91 2006-05-29 00:00:00 2.66 2.53 2.79 2005-12-31 00:00:00
## 6 2.91 2006-06-26 00:00:00 2.68 2.55 2.82 2005-12-31 00:00:00
df.cv %>%
ggplot()+
geom_point(aes(ds,y)) +
geom_point(aes(ds,yhat,color=factor(cutoff)))+
theme_bw()+
xlab("Date")+
ylab("Avg Price")+
scale_color_discrete(name = 'Cutoff')+
theme(legend.position = "None")+
labs(
title = 'Forecast: Average Price of Fuel',
subtitle = 'Cross Validation'
)
plot_cross_validation_metric(df.cv, metric = 'rmse')
plot_cross_validation_metric(df.cv, metric = 'mape')