library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.1 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.1 ✔ fabletools 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
#Natural Gas Consumption data from EIA
mydata=read.csv("C:/Users/lfult/Documents/_Courses/Boston College/Predictive analytics/naturalgas.csv")
mydata$Year=seq(as.Date("1973/1/1"), as.Date("2020/12/1"), "months")
myts=mydata%>%
mutate(YearMonth = yearmonth(as.character(Year))) %>%
as_tsibble(index = YearMonth)
myts%>%autoplot(Amount)
train=myts[1:468,]
test=myts[469:nrow(myts),]
lambda <- train %>%features(Amount, features = guerrero) %>%pull(lambda_guerrero)
train$Amount=train$Amount^lambda
test$Amount=test$Amount^lambda
fit=train |>model(trend_model = TSLM(Amount ~ trend()),
mean_model=MEAN(Amount),
drift_model=RW(Amount~drift()),
naive_model=NAIVE(Amount),
snaive_model=SNAIVE(Amount))
fit|>forecast(h=108)
## # A fable: 540 x 4 [1M]
## # Key: .model [5]
## .model YearMonth Amount .mean
## <chr> <mth> <dist> <dbl>
## 1 trend_model 2012 Jan N(0.19, 0.00031) 0.186
## 2 trend_model 2012 Feb N(0.19, 0.00031) 0.186
## 3 trend_model 2012 Mar N(0.19, 0.00031) 0.186
## 4 trend_model 2012 Apr N(0.19, 0.00031) 0.186
## 5 trend_model 2012 May N(0.19, 0.00031) 0.186
## 6 trend_model 2012 Jun N(0.19, 0.00031) 0.186
## 7 trend_model 2012 Jul N(0.19, 0.00031) 0.186
## 8 trend_model 2012 Aug N(0.19, 0.00031) 0.186
## 9 trend_model 2012 Sep N(0.19, 0.00031) 0.186
## 10 trend_model 2012 Oct N(0.19, 0.00031) 0.186
## # ℹ 530 more rows
fit|>forecast(h=108)|>autoplot(train)
myf=fit%>%forecast(h=108)
(tmp=accuracy(myf, test))
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift_model Test 0.0211 0.0291 0.0230 10.3 11.4 NaN NaN 0.835
## 2 mean_model Test 0.00208 0.0200 0.0183 -0.0360 9.85 NaN NaN 0.835
## 3 naive_model Test 0.0216 0.0294 0.0232 10.6 11.6 NaN NaN 0.835
## 4 snaive_model Test 0.000670 0.00294 0.00240 0.304 1.32 NaN NaN 0.427
## 5 trend_model Test 0.000252 0.0199 0.0183 -1.02 9.91 NaN NaN 0.835
dcmp <- train%>%
model(stl = STL(Amount))
components(dcmp)
## # A dable: 468 x 7 [1M]
## # Key: .model [1]
## # : Amount = trend + season_year + remainder
## .model YearMonth Amount trend season_year remainder season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stl 1973 Jan 0.161 0.182 -0.0214 0.000125 0.182
## 2 stl 1973 Feb 0.163 0.182 -0.0197 0.000949 0.183
## 3 stl 1973 Mar 0.166 0.182 -0.0153 -0.000397 0.182
## 4 stl 1973 Apr 0.174 0.182 -0.00868 0.000471 0.183
## 5 stl 1973 May 0.182 0.182 0.00159 -0.00137 0.181
## 6 stl 1973 Jun 0.194 0.182 0.0122 -0.000610 0.182
## 7 stl 1973 Jul 0.202 0.182 0.0188 0.00121 0.184
## 8 stl 1973 Aug 0.204 0.182 0.0222 -0.000831 0.182
## 9 stl 1973 Sep 0.201 0.183 0.0202 -0.00192 0.181
## 10 stl 1973 Oct 0.194 0.183 0.00949 0.00210 0.185
## # ℹ 458 more rows
components(dcmp) %>% as_tsibble() %>%
autoplot(Amount, colour="black") +
geom_line(aes(y=season_year+trend), colour = "#D55E00", line_type='dotted') +
labs(y = "Amount", title = "NG Consumption")
## Warning in geom_line(aes(y = season_year + trend), colour = "#D55E00",
## line_type = "dotted"): Ignoring unknown parameters: `line_type`
autoplot(components(dcmp))
# Forecasting with STL
fit_dcmp <- train |>
model(stlf_model = decomposition_model(
STL(Amount ~ trend(window = 7), robust = TRUE),
NAIVE(season_adjust)
))
fit_dcmp |>forecast(h=108) |>autoplot(train)
myf2=fit_dcmp%>%forecast(h=108)
(tmp=rbind(tmp,accuracy(myf2, test)))
## # A tibble: 6 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 drift_model Test 0.0211 0.0291 0.0230 10.3 11.4 NaN NaN 0.835
## 2 mean_model Test 0.00208 0.0200 0.0183 -0.0360 9.85 NaN NaN 0.835
## 3 naive_model Test 0.0216 0.0294 0.0232 10.6 11.6 NaN NaN 0.835
## 4 snaive_model Test 0.000670 0.00294 0.00240 0.304 1.32 NaN NaN 0.427
## 5 trend_model Test 0.000252 0.0199 0.0183 -1.02 9.91 NaN NaN 0.835
## 6 stlf_model Test 0.000906 0.00337 0.00275 0.399 1.49 NaN NaN 0.555
components(dcmp) %>% as_tsibble() %>%
autoplot(Amount, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(y = "Amount", title = "NG Consumption")
ma <- myts%>%filter(year(Year) >= 2000)%>%
mutate(`5-MA` = slider::slide_dbl(Amount, mean,
.before = 2, .after = 2, .complete = TRUE))
ma%>% autoplot(Amount) +
geom_line(aes(y = `5-MA`), colour = "#D55E00") +
labs(y = "Amount", title = "NG Consumption") +
guides(colour = guide_legend(title = "series"))
## Warning: Removed 4 rows containing missing values (`geom_line()`).
# Additive & Multiplicative Decomposition (Classical)
dcmp <- train%>%
model(add=classical_decomposition(Amount, type = "additive"),
mult=classical_decomposition(Amount, type="multiplicative"),
stl=STL(Amount))
components(dcmp)
## # A dable: 1,404 x 9 [1M]
## # Key: .model [3]
## # : Amount = trend + seasonal + random
## .model YearMonth Amount trend seasonal random season_adjust season_year
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 add 1973 Jan 0.161 NA -0.0240 NA 0.185 NA
## 2 add 1973 Feb 0.163 NA -0.0216 NA 0.185 NA
## 3 add 1973 Mar 0.166 NA -0.0171 NA 0.183 NA
## 4 add 1973 Apr 0.174 NA -0.00776 NA 0.182 NA
## 5 add 1973 May 0.182 NA 0.00485 NA 0.178 NA
## 6 add 1973 Jun 0.194 NA 0.0161 NA 0.178 NA
## 7 add 1973 Jul 0.202 0.182 0.0217 -0.00165 0.181 NA
## 8 add 1973 Aug 0.204 0.182 0.0240 -0.00261 0.180 NA
## 9 add 1973 Sep 0.201 0.183 0.0215 -0.00332 0.179 NA
## 10 add 1973 Oct 0.194 0.183 0.00799 0.00348 0.186 NA
## # ℹ 1,394 more rows
## # ℹ 1 more variable: remainder <dbl>
autoplot(components(dcmp))
## Warning: Removed 948 rows containing missing values (`geom_line()`).
add=components(dcmp)%>%filter(.model=='add')
mult=components(dcmp)%>%filter(.model=='mult')
stl=components(dcmp)%>%filter(.model=='stl')
trend1=mean(add$trend[457:462])
trend2=mean(mult$trend[457:462])
trend3=mean(stl$trend[457:462])
f1=rep(0, nrow(test))
f2=rep(0,nrow(test))
f3=rep(0,nrow(test))
j=0 #Month Counter
for (i in 1:nrow(test)){
j=j+1
if(j==13){j=1}
f1[i]=add$seasonal[j]+trend1
f2[i]=mult$seasonal[j]*trend2
f3[i]=stl$season_year[j]+trend3
}
test%>%autoplot()+geom_line(aes(y=f1, x=Year, col='red'))+
geom_line(aes(y=f2, x=Year, col='blue'))+
geom_line(aes(y=f3, x=Year, col='orange'))
## Plot variable not specified, automatically selected `.vars = Amount`
metrics = function(res, x){
RMSE=mean(sqrt(res^2)/length(res))
MAE=mean(abs(res)/length(res))
MAPE=mean(abs(res)/x)
ME=mean(res)
MPE=mean(res/x)
mylist=list(RMSE, MAE, MAPE, ME, MPE)
names(mylist)=c('RMSE','MAE', 'MAPE', 'ME', 'MPE')
return(mylist)
}
#residuals
res1=test$Amount-f1
res2=test$Amount-f2
res3=test$Amount-f3
#Generate the Metrics
tmp=cbind(metrics(f1,test$Amount), metrics(f2,test$Amount), metrics(f3,test$Amount))
colnames(tmp)=c('Add', 'Mult', 'STL')
tmp
## Add Mult STL
## RMSE 0.001719719 0.001719719 0.001720351
## MAE 0.001719719 0.001719719 0.001720351
## MAPE 0.9971803 0.9971163 0.998778
## ME 0.1857297 0.1857297 0.1857979
## MPE 0.9971803 0.9971163 0.998778