Data 624 project1

\[Libraries \]

library(readxl)
library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2

## Warning: package 'dplyr' was built under R version 4.3.2

## Warning: package 'tidyr' was built under R version 4.3.2

## Warning: package 'lubridate' was built under R version 4.3.2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tsibble' was built under R version 4.3.3

## Warning: package 'tsibbledata' was built under R version 4.3.3

## Warning: package 'feasts' was built under R version 4.3.3

## Warning: package 'fabletools' was built under R version 4.3.3

## Warning: package 'fable' was built under R version 4.3.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(ggfortify)

## Warning: package 'ggfortify' was built under R version 4.3.3

library(DataExplorer)

## Warning: package 'DataExplorer' was built under R version 4.3.3

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.2

## Warning: package 'purrr' was built under R version 4.3.2

## Warning: package 'stringr' was built under R version 4.3.2

## Warning: package 'forcats' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gridExtra)

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## Registered S3 methods overwritten by 'forecast':
##   method                 from     
##   autoplot.Arima         ggfortify
##   autoplot.acf           ggfortify
##   autoplot.ar            ggfortify
##   autoplot.bats          ggfortify
##   autoplot.decomposed.ts ggfortify
##   autoplot.ets           ggfortify
##   autoplot.forecast      ggfortify
##   autoplot.stl           ggfortify
##   autoplot.ts            ggfortify
##   fitted.ar              ggfortify
##   fortify.ts             ggfortify
##   residuals.ar           ggfortify

library(dplyr)

\[PART A \] ## Load & prepare the data:

atm= read_excel("C:/Users/Chafiaa/Downloads/ATM624Data.xlsx")
head(atm)

## # A tibble: 6 × 3
##    DATE ATM    Cash
##   <dbl> <chr> <dbl>
## 1 39934 ATM1     96
## 2 39934 ATM2    107
## 3 39935 ATM1     82
## 4 39935 ATM2     89
## 5 39936 ATM1     85
## 6 39936 ATM2     90

print(dim(atm))

## [1] 1474    3

atm%>%
  filter(is.na(Cash) & !is.na(ATM))

## # A tibble: 5 × 3
##    DATE ATM    Cash
##   <dbl> <chr> <dbl>
## 1 39977 ATM1     NA
## 2 39980 ATM1     NA
## 3 39982 ATM2     NA
## 4 39986 ATM1     NA
## 5 39988 ATM2     NA

atm <- atm %>%
  filter(!is.na(Cash) | !is.na(ATM))

summary(atm)

##       DATE           ATM                 Cash        
##  Min.   :39934   Length:1460        Min.   :    0.0  
##  1st Qu.:40025   Class :character   1st Qu.:    0.5  
##  Median :40116   Mode  :character   Median :   73.0  
##  Mean   :40116                      Mean   :  155.6  
##  3rd Qu.:40207                      3rd Qu.:  114.0  
##  Max.   :40298                      Max.   :10919.8  
##                                     NA's   :5

To be able to work with this data w/o running to errors we need to turn Cash to integer.

atm <- atm %>%
  mutate(DATE = as_date(DATE), Cash = as.integer(Cash))
str(atm)

## tibble [1,460 × 3] (S3: tbl_df/tbl/data.frame)
##  $ DATE: Date[1:1460], format: "2079-05-03" "2079-05-03" ...
##  $ ATM : chr [1:1460] "ATM1" "ATM2" "ATM1" "ATM2" ...
##  $ Cash: int [1:1460] 96 107 82 89 85 90 90 55 99 79 ...

Exploring the Data:

let start with ploting the 4 MTAs:

atm1 <- atm %>%
  filter(ATM == "ATM1") %>%
  as_tsibble(index = DATE)


autoplot(atm1) +
  labs(title="ATM1", subtitle="Cash withdrawals per day", y="Hundreds USD")

## Plot variable not specified, automatically selected `.vars = Cash`

atm2 <- atm %>%
  filter(ATM == "ATM2") %>%
  as_tsibble(index = DATE)


autoplot(atm2) +
  labs(title="ATM2", subtitle="Cash withdrawals per day", y="Hundreds USD")

## Plot variable not specified, automatically selected `.vars = Cash`

atm3 <- atm %>%
  filter(ATM == "ATM3") %>%
  as_tsibble(index = DATE)


autoplot(atm3) +
  labs(title="ATM3", subtitle="Cash withdrawals per day", y="Hundreds USD")

## Plot variable not specified, automatically selected `.vars = Cash`

atm4 <- atm %>%
  filter(ATM == "ATM4") %>%
  as_tsibble(index = DATE)


autoplot(atm4) +
  labs(title="ATM4", subtitle="Cash withdrawals per day", y="Hundreds USD")

## Plot variable not specified, automatically selected `.vars = Cash`

## Based on our plots above, we can observe that “ATM1” and “ATM2” are time series that have constant variability, with no apparent trend and potential seasonality. However, “ATM3”seems to only have withdrawals for the last few days in the data & “ATM4” has a clear outlier.

which(atm4$Cash == 10919)

## [1] 285

The outlier is found on te row 285 of ATM4 time series.

atm4[285,3] <- round(mean(atm4$Cash),0) # considering 285 as an error we plot MTA4 again

autoplot(atm4) +
  labs(title="ATM4", subtitle="Cash withdrawals per day", y="Hundreds USD")

## Plot variable not specified, automatically selected `.vars = Cash`

missing values on the MTAs:

sum(is.na(atm1$Cash))

## [1] 3

sum(is.na(atm2$Cash))

## [1] 2

sum(is.na(atm3$Cash))

## [1] 0

sum(is.na(atm4$Cash))

## [1] 0

which rows missing values:

which(is.na(atm1$Cash))

## [1] 44 47 53

which(is.na(atm2$Cash))

## [1] 49 55

hist(atm1$Cash)

hist(atm2$Cash)

## We don’t see an evident skewness on the distribution of “Cash” for either ATM1 & 2. I’ll use median to input the values

atm1[44,3] <- round(median(atm1$Cash, na.rm=TRUE),0)
atm1[47,3] <- round(median(atm1$Cash, na.rm=TRUE),0)
atm1[53,3] <- round(median(atm1$Cash, na.rm=TRUE),0)
atm2[49,3] <- round(median(atm2$Cash, na.rm=TRUE),0)
atm2[55,3] <- round(median(atm2$Cash, na.rm=TRUE),0)

Box-Cox transformation for each series to help make them a little simpler:

lambda1 <- atm1%>%
  features(Cash,features = guerrero)%>%
  pull(lambda_guerrero)

plot_trans1 <- atm1 %>%
  autoplot(box_cox(Cash, lambda1)) +
  labs(title="ATM1 TRANSFORMED", subtitle="Cash withdrawals per day", y="USD")

lambda2 <- atm2%>%
  features(Cash,features = guerrero)%>%
  pull(lambda_guerrero)

plot_trans2 <- atm2 %>%
  autoplot(box_cox(Cash, lambda2)) +
  labs(title="ATM2 TRANSFORMED", subtitle="Cash withdrawals per day", y="USD")

lambda3 <- atm3%>%
  features(Cash,features = guerrero)%>%
  pull(lambda_guerrero)

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

plot_trans3 <- atm3 %>%
  autoplot(box_cox(Cash, lambda3)) +
  labs(title="ATM3 TRANSFORMED", subtitle="Cash withdrawals per day", y="USD")

lambda4 <- atm4%>%
  features(Cash,features = guerrero)%>%
  pull(lambda_guerrero)

plot_trans4 <- atm4 %>%
  autoplot(box_cox(Cash, lambda4)) +
  labs(title="ATM4 TRANSFORMED", subtitle="Cash withdrawals per day", y="USD")

grid.arrange(plot_trans1, plot_trans2, plot_trans3, plot_trans4, nrow = 2)

The transformations helped scale down the time series of all ATMs.

Build Model:

atm1%>%
  model(classical_decomposition(box_cox(Cash, lambda1), type="additive")) %>%
  components () %>%
  autoplot() + 
  labs(title="Classical additive decomposition of ATM1")

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

atm2%>%
  model(classical_decomposition(box_cox(Cash, lambda2), type="additive")) %>%
  components () %>%
  autoplot() + 
  labs(title="Classical additive decomposition of ATM2")

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

atm3%>%
  model(classical_decomposition(box_cox(Cash, lambda3), type="additive")) %>%
  components () %>%
  autoplot() + 
  labs(title="Classical additive decomposition of ATM3")

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

atm4%>%
  model(classical_decomposition(box_cox(Cash, lambda4), type="additive")) %>%
  components () %>%
  autoplot() + 
  labs(title="Classical additive decomposition of ATM4")

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

## From the plots we see a strong seasonal component for all ATMs.

plot_acf1 <- atm1 %>%
  ACF(box_cox(Cash, lambda1)) %>%
  autoplot() + labs(title="Autocorrelation of Cash ATM1")

plot_acf2 <- atm2 %>%
  ACF(box_cox(Cash, lambda2)) %>%
  autoplot() + labs(title="Autocorrelation of Cash ATM2")

plot_acf3 <- atm3 %>%
  ACF(box_cox(Cash, lambda3)) %>%
  autoplot() + labs(title="Autocorrelation of Cash ATM3")

plot_acf4 <- atm4 %>%
  ACF(box_cox(Cash, lambda4)) %>%
  autoplot() + labs(title="Autocorrelation of Cash ATM4")

grid.arrange(plot_acf1, plot_acf2, plot_acf3, plot_acf4, nrow = 2)

ACF plots show that we may need to apply unitroot_nsdiffs() to the daily cash withdrawals for each ATM in order to determine if we need any seasonal differencing by week.

atm1 %>%
  features(box_cox(Cash, lambda1), unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1

atm2 %>%
  features(box_cox(Cash, lambda2), unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1

atm3 %>%
  features(box_cox(Cash, lambda3), unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       0

atm4 %>%
  features(box_cox(Cash, lambda4), unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       0

We need to apply seasonal differencing to “ATM1” and “ATM2”

atm1 %>%
  features(difference(box_cox(Cash, lambda1), 7), unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      0

atm2 %>%
  features(difference(box_cox(Cash, lambda2), 7), unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      0

atm3 %>%
  features(box_cox(Cash, lambda3), unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      0

atm4 %>%
  features(box_cox(Cash, lambda4), unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      0

No additional differencing seems to be needed

atm1 %>%
  ACF(difference(box_cox(Cash, lambda1), 7)) %>%
  autoplot() + labs(title="Autocorrelation of Cash ATM1")

atm2 %>%
  ACF(difference(box_cox(Cash, lambda2), 7)) %>%
  autoplot() + labs(title="Autocorrelation of Cash ATM2")

## Differencing seems to have made the data look closer to white noise.

Considering that these data need differencing, we will use the ARIMA() model. For “ATM3” we will use the naive model.

atm1_fit <- atm1 %>%
  model(ARIMA(box_cox(Cash, lambda1)))

report(atm1_fit)

## Series: Cash 
## Model: ARIMA(0,0,2)(0,1,1)[7] 
## Transformation: box_cox(Cash, lambda1) 
## 
## Coefficients:
##          ma1      ma2     sma1
##       0.0994  -0.1141  -0.6456
## s.e.  0.0524   0.0527   0.0426
## 
## sigma^2 estimated as 1.576:  log likelihood=-589.77
## AIC=1187.54   AICc=1187.66   BIC=1203.07

atm1_fit %>%
  gg_tsresiduals()

atm2_fit <- atm2 %>%
  model(ARIMA(box_cox(Cash, lambda2)))

report(atm2_fit)

## Series: Cash 
## Model: ARIMA(2,0,2)(0,1,1)[7] 
## Transformation: box_cox(Cash, lambda2) 
## 
## Coefficients:
##           ar1      ar2    ma1     ma2     sma1
##       -0.4247  -0.9074  0.468  0.8004  -0.7262
## s.e.   0.0587   0.0438  0.089  0.0584   0.0409
## 
## sigma^2 estimated as 69.93:  log likelihood=-1267.92
## AIC=2547.85   AICc=2548.09   BIC=2571.13

atm2_fit %>%
  gg_tsresiduals()

atm3_fit <- atm3 %>%
  model(NAIVE(box_cox(Cash, lambda3)))

report(atm3_fit)

## Series: Cash 
## Model: NAIVE 
## Transformation: box_cox(Cash, lambda3) 
## 
## sigma^2: 396.8997

atm3_fit %>%
  gg_tsresiduals()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

atm4_fit <- atm4 %>%
  model(ARIMA(box_cox(Cash, lambda4)))

report(atm4_fit)

## Series: Cash 
## Model: ARIMA(0,0,1)(2,0,0)[7] w/ mean 
## Transformation: box_cox(Cash, lambda4) 
## 
## Coefficients:
##          ma1    sar1    sar2  constant
##       0.0761  0.2091  0.1986   17.3297
## s.e.  0.0529  0.0517  0.0525    0.7524
## 
## sigma^2 estimated as 187.5:  log likelihood=-1471.55
## AIC=2953.1   AICc=2953.27   BIC=2972.6

atm4_fit %>%
  gg_tsresiduals()

All residuals, except for “ATM3”, look like they have constant variability, seem to be white noise and have approximately close to normal distributions.

Forecasts: Save the forecast of each ATM in its own .csv doc:

forecast_atm1 <- atm1_fit %>% forecast(h=31)
forecast_atm2 <- atm2_fit %>% forecast(h=31)
forecast_atm3 <- atm3_fit %>% forecast(h=31)
forecast_atm4 <- atm4_fit %>% forecast(h=31)

write.csv(forecast_atm1,"forecast_atm1.csv")
write.csv(forecast_atm2,"forecast_atm2.csv")
write.csv(forecast_atm3,"forecast_atm3.csv")
write.csv(forecast_atm4,"forecast_atm4.csv")

\[ PART B\] ## Load the Data:

RCFL= read_excel("C:/Users/Chafiaa/Downloads/ResidentialCustomerForecastLoad-624.xlsx")
head(RCFL)

## # A tibble: 6 × 3
##   CaseSequence `YYYY-MMM`     KWH
##          <dbl> <chr>        <dbl>
## 1          733 1998-Jan   6862583
## 2          734 1998-Feb   5838198
## 3          735 1998-Mar   5420658
## 4          736 1998-Apr   5010364
## 5          737 1998-May   4665377
## 6          738 1998-Jun   6467147

print(dim(RCFL))

## [1] 192   3

which(is.na(RCFL), arr.ind=TRUE)

##      row col
## [1,] 129   3

RCFL_data <- RCFL %>% rename(Date = 'YYYY-MMM')
RCFL_data <- RCFL_data %>%  mutate(Date = as.Date(paste0('01-', Date), '%d-%Y-%b'))
min(RCFL_data$KWH,na.rm = TRUE)

## [1] 770523

RCFL_2 <-ts(RCFL[, "KWH"], start = c(1998, 1), frequency = 12)
ggseasonplot(RCFL_2)+ggtitle('USAGE BY YEAR FOR RESIDENTIAL POWER')

RCFL_data<- RCFL_data[-c(129,151),]

#Get average by month
RCFL_data$Month <- months(RCFL_data$Date)
aggregate(KWH ~ Month, RCFL_data, mean)

##        Month     KWH
## 1      April 5299734
## 2     August 8298211
## 3   December 6283175
## 4   February 6946556
## 5    January 8007422
## 6       July 7852521
## 7       June 6536092
## 8      March 5971450
## 9        May 5039034
## 10  November 4953619
## 11   October 5657164
## 12 September 7702333

RCFL$KWH[is.na(RCFL$KWH)] = median(RCFL$KWH, na.rm=TRUE)

summary(RCFL)

##   CaseSequence     YYYY-MMM              KWH          
##  Min.   :733.0   Length:192         Min.   :  770523  
##  1st Qu.:780.8   Class :character   1st Qu.: 5434539  
##  Median :828.5   Mode  :character   Median : 6283324  
##  Mean   :828.5                      Mean   : 6501333  
##  3rd Qu.:876.2                      3rd Qu.: 7608792  
##  Max.   :924.0                      Max.   :10655730

RCFL_ts <- ts(RCFL$KWH, start=c(1998,1), frequency = 12)
RCFL_ts

##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1998  6862583  5838198  5420658  5010364  4665377  6467147  8914755  8607428
## 1999  7183759  5759262  4847656  5306592  4426794  5500901  7444416  7564391
## 2000  7068296  5876083  4807961  4873080  5050891  7092865  6862662  7517830
## 2001  7538529  6602448  5779180  4835210  4787904  6283324  7855129  8450717
## 2002  7099063  6413429  5839514  5371604  5439166  5850383  7039702  8058748
## 2003  7256079  6190517  6120626  4885643  5296096  6051571  6900676  8476499
## 2004  7584596  6560742  6526586  4831688  4878262  6421614  7307931  7309774
## 2005  8225477  6564338  5581725  5563071  4453983  5900212  8337998  7786659
## 2006  7793358  5914945  5819734  5255988  4740588  7052275  7945564  8241110
## 2007  8031295  7928337  6443170  4841979  4862847  5022647  6426220  7447146
## 2008  7964293  7597060  6085644  5352359  4608528  6548439  7643987  8037137
## 2009  8072330  6976800  5691452  5531616  5264439  5804433  7713260  8350517
## 2010  9397357  8390677  7347915  5776131  4919289  6696292   770523  7922701
## 2011  8394747  8898062  6356903  5685227  5506308  8037779 10093343 10308076
## 2012  8991267  7952204  6356961  5569828  5783598  7926956  8886851  9612423
## 2013 10655730  7681798  6517514  6105359  5940475  7920627  8415321  9080226
##           Sep      Oct      Nov      Dec
## 1998  6989888  6345620  4640410  4693479
## 1999  7899368  5358314  4436269  4419229
## 2000  8912169  5844352  5041769  6220334
## 2001  7112069  5242535  4461979  5240995
## 2002  8245227  5865014  4908979  5779958
## 2003  7791791  5344613  4913707  5756193
## 2004  6690366  5444948  4824940  5791208
## 2005  7057213  6694523  4313019  6181548
## 2006  7296355  5104799  4458429  6226214
## 2007  7666970  5785964  4907057  6047292
## 2008  6283324  5101803  4555602  6442746
## 2009  7583146  5566075  5339890  7089880
## 2010  7819472  5875917  4800733  6152583
## 2011  8943599  5603920  6154138  8273142
## 2012  7559148  5576852  5731899  6609694
## 2013  7968220  5759367  5769083  9606304

ggtsdisplay(RCFL_ts, main="Monthly Power Consumption before transform")

BOXCOX Tansform:

RCFLS_BXCX <- RCFL_ts %>% BoxCox(lambda= 'auto')
ggtsdisplay(RCFLS_BXCX, main='MONTHLY POWER CONSUMER BXCX')

## Exploratory data analysis:

ggseasonplot(RCFLS_BXCX)

summary(RCFLS_BXCX)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.80   38.69   39.42   39.47   40.41   42.20

ggsubseriesplot(RCFLS_BXCX)

ggAcf(RCFLS_BXCX)

Box.test(RCFLS_BXCX, type = c("Ljung-Box"))

## 
##  Box-Ljung test
## 
## data:  RCFLS_BXCX
## X-squared = 16.556, df = 1, p-value = 4.723e-05

summary(RCFLS_BXCX)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.80   38.69   39.42   39.47   40.41   42.20

boxplot(RCFLS_BXCX~cycle(RCFLS_BXCX))

## Differencing:

print(paste0("Suggested # of diff: ", ndiffs(RCFLS_BXCX)))

## [1] "Suggested # of diff: 1"

print(paste0("DIFF REQUIRED (SEASIONAL): ", ndiffs(diff(RCFLS_BXCX, lag=12))))

## [1] "DIFF REQUIRED (SEASIONAL): 0"

RCFL_PWR_DIFF <- RCFLS_BXCX %>% diff(lag = 12)
ggtsdisplay(RCFL_PWR_DIFF, main= "Monthly power consumption BXCX AND DIFF")

ggseasonplot(RCFL_PWR_DIFF,polar = TRUE)+ggtitle('Residential Power Usage by Year')

plot(RCFL_PWR_DIFF)

autoplot(RCFL_PWR_DIFF, series="Data")+
  autolayer(ma(RCFL_PWR_DIFF, 12), series = "12 MTH Moving Avg")+ ggtitle("2014 MVING AVG")

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Forecast Models

1.

RCFLS_STL <- stlf(RCFL_PWR_DIFF, damped=FALSE, s.window = "periodic", robust=TRUE, h = 12)

# forecast plot

autoplot(RCFLS_STL) + autolayer(fitted(RCFLS_STL))

2.

RCFL_STL_DP <- stlf(RCFL_PWR_DIFF, damped=TRUE, s.window = "periodic", robust=TRUE, h = 12)

# forecast plot

autoplot(RCFL_STL_DP) + autolayer(fitted(RCFL_STL_DP))

3.

arima_model <- auto.arima(RCFL_PWR_DIFF)

# forecast values
arima_model <- forecast(arima_model, h=20)

# forecast plot
autoplot(arima_model) + autolayer(fitted(arima_model))

4.

RCFL_ETS<- ets(RCFL_PWR_DIFF)

# forecast plot
autoplot(forecast(RCFL_ETS
                , h=12)) + autolayer(fitted(RCFL_ETS
                                                    ))

## 5.

RCFL_FCST_PWR_S <- ses(RCFL_PWR_DIFF, h=12)
autoplot(RCFL_FCST_PWR_S)+
  autolayer(fitted(RCFL_FCST_PWR_S), series="Fitted")

## Models comparison:

accuracy(RCFLS_STL)

##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.0005455413 1.348221 0.5931651 -1360.432 1605.179 0.5404898
##                   ACF1
## Training set 0.1194029

checkresiduals(RCFLS_STL)

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,N,N)
## Q* = 65.794, df = 24, p-value = 9.303e-06
## 
## Model df: 0.   Total lags used: 24

summary(RCFLS_STL)

## 
## Forecast method: STL +  ETS(A,N,N)
## 
## Model Information:
## ETS(A,N,N) 
## 
## Call:
## ets(y = na.interp(x), model = etsmodel, damped = FALSE, allow.multiplicative.trend = allow.multiplicative.trend)
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = 0.0713 
## 
##   sigma:  1.3558
## 
##      AIC     AICc      BIC 
## 1048.295 1048.432 1057.874 
## 
## Error measures:
##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.0005455413 1.348221 0.5931651 -1360.432 1605.179 0.5404898
##                   ACF1
## Training set 0.1194029
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2014     0.17305758 -1.564437 1.910552 -2.484211 2.830326
## Feb 2014     0.06130592 -1.676189 1.798801 -2.595963 2.718575
## Mar 2014     0.12236178 -1.615133 1.859857 -2.534907 2.779631
## Apr 2014     0.08210098 -1.655394 1.819596 -2.575168 2.739370
## May 2014     0.12746350 -1.610031 1.864958 -2.529805 2.784732
## Jun 2014     0.14679543 -1.590699 1.884290 -2.510474 2.804064
## Jul 2014    -0.19208644 -1.929581 1.545408 -2.849355 2.465183
## Aug 2014    -0.02245609 -1.759951 1.715039 -2.679725 2.634813
## Sep 2014     0.16034813 -1.577147 1.897843 -2.496921 2.817617
## Oct 2014     0.05745299 -1.680042 1.794948 -2.599816 2.714722
## Nov 2014     0.08070004 -1.656795 1.818195 -2.576569 2.737969
## Dec 2014     0.05898262 -1.678512 1.796477 -2.598286 2.716252

accuracy(RCFL_STL_DP)

##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set -0.007629573 1.349456 0.5947813 -1328.026 1577.668 0.5419625
##                  ACF1
## Training set 0.120852

checkresiduals(RCFL_STL_DP)

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,Ad,N)
## Q* = 65.735, df = 24, p-value = 9.492e-06
## 
## Model df: 0.   Total lags used: 24

summary(RCFL_STL_DP)

## 
## Forecast method: STL +  ETS(A,Ad,N)
## 
## Model Information:
## ETS(A,Ad,N) 
## 
## Call:
## ets(y = na.interp(x), model = etsmodel, damped = TRUE, allow.multiplicative.trend = allow.multiplicative.trend)
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
##     phi   = 0.9622 
## 
##   Initial states:
##     l = 0.1056 
##     b = -6e-04 
## 
##   sigma:  1.3686
## 
##      AIC     AICc      BIC 
## 1054.625 1055.110 1073.783 
## 
## Error measures:
##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set -0.007629573 1.349456 0.5947813 -1328.026 1577.668 0.5419625
##                  ACF1
## Training set 0.120852
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2014     0.17900301 -1.574926 1.932932 -2.503400 2.861406
## Feb 2014     0.06761419 -1.686315 1.821544 -2.614789 2.750018
## Mar 2014     0.12901915 -1.624910 1.882949 -2.553385 2.811423
## Apr 2014     0.08909425 -1.664835 1.843024 -2.593310 2.771498
## May 2014     0.13477996 -1.619150 1.888710 -2.547624 2.817184
## Jun 2014     0.15442285 -1.599507 1.908353 -2.527982 2.836828
## Jul 2014    -0.18415983 -1.938091 1.569771 -2.866565 2.498246
## Aug 2014    -0.01424159 -1.768173 1.739690 -2.696648 2.668165
## Sep 2014     0.16883961 -1.585092 1.922772 -2.513568 2.851247
## Oct 2014     0.06621097 -1.687722 1.820144 -2.616198 2.748620
## Nov 2014     0.08971444 -1.664219 1.843648 -2.592696 2.772125
## Dec 2014     0.06824374 -1.685691 1.822179 -2.614168 2.750656

accuracy(arima_model)

##                       ME      RMSE       MAE       MPE     MAPE      MASE
## Training set -0.02554754 0.9666583 0.4541852 -1920.696 2160.894 0.4138519
##                     ACF1
## Training set 0.006469743

checkresiduals(arima_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,1)(0,0,2)[12] with non-zero mean
## Q* = 6.8185, df = 21, p-value = 0.9985
## 
## Model df: 3.   Total lags used: 24

summary(arima_model)

## 
## Forecast method: ARIMA(0,0,1)(0,0,2)[12] with non-zero mean
## 
## Model Information:
## Series: RCFL_PWR_DIFF 
## ARIMA(0,0,1)(0,0,2)[12] with non-zero mean 
## 
## Coefficients:
##          ma1     sma1    sma2    mean
##       0.1195  -0.9264  0.0890  0.0616
## s.e.  0.0712   0.0810  0.0811  0.0198
## 
## sigma^2 = 0.9557:  log likelihood = -257.11
## AIC=524.23   AICc=524.57   BIC=540.19
## 
## Error measures:
##                       ME      RMSE       MAE       MPE     MAPE      MASE
## Training set -0.02554754 0.9666583 0.4541852 -1920.696 2160.894 0.4138519
##                     ACF1
## Training set 0.006469743
## 
## Forecasts:
##          Point Forecast     Lo 80      Hi 80     Lo 95     Hi 95
## Jan 2014    -0.71170867 -1.964897  0.5414796 -2.628295 1.2048774
## Feb 2014     0.20176196 -1.060338  1.4638615 -1.728453 2.1319766
## Mar 2014     0.10559912 -1.156500  1.3676986 -1.824616 2.0358137
## Apr 2014    -0.21986269 -1.481962  1.0422368 -2.150077 1.7103519
## May 2014    -0.33880539 -1.600905  0.9232941 -2.269020 1.5914092
## Jun 2014    -0.43704168 -1.699141  0.8250578 -2.367256 1.4931729
## Jul 2014    -1.07257955 -2.334679  0.1895200 -3.002794 0.8576351
## Aug 2014     0.06430436 -1.197795  1.3264039 -1.865910 1.9945190
## Sep 2014     0.14938779 -1.112712  1.4114873 -1.780827 2.0796024
## Oct 2014     0.22911025 -1.032989  1.4912097 -1.701104 2.1593249
## Nov 2014    -0.19797910 -1.460079  1.0641204 -2.128194 1.7322355
## Dec 2014    -1.54532103 -2.807419 -0.2832232 -3.475533 0.3848910
## Jan 2015    -0.02932622 -1.743669  1.6850167 -2.651187 2.5925349
## Feb 2015     0.05006537 -1.669873  1.7700036 -2.580353 2.6804839
## Mar 2015     0.05625769 -1.663681  1.7761959 -2.574161 2.6866762
## Apr 2015     0.08773329 -1.632205  1.8076715 -2.542685 2.7181518
## May 2015     0.10404245 -1.615896  1.8239807 -2.526376 2.7344610
## Jun 2015     0.11673378 -1.603204  1.8366720 -2.513685 2.7471523
## Jul 2015     0.18863975 -1.531298  1.9085780 -2.441779 2.8190583
## Aug 2015     0.06605819 -1.653880  1.7859964 -2.564360 2.6964767

accuracy(RCFL_ETS)

##                         ME     RMSE       MAE       MPE     MAPE     MASE
## Training set -1.229798e-05 1.346981 0.6002461 -768.1263 994.3076 0.546942
##                   ACF1
## Training set 0.1202904

checkresiduals(RCFL_ETS)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 66.674, df = 24, p-value = 6.893e-06
## 
## Model df: 0.   Total lags used: 24

summary(RCFL_ETS)

## ETS(A,N,N) 
## 
## Call:
## ets(y = RCFL_PWR_DIFF)
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = 0.0719 
## 
##   sigma:  1.3545
## 
##      AIC     AICc      BIC 
## 1047.964 1048.100 1057.543 
## 
## Training set error measures:
##                         ME     RMSE       MAE       MPE     MAPE     MASE
## Training set -1.229798e-05 1.346981 0.6002461 -768.1263 994.3076 0.546942
##                   ACF1
## Training set 0.1202904

accuracy(RCFL_FCST_PWR_S)

##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.0002402688 1.346981 0.6002376 -765.1024 991.1566 0.5469343
##                   ACF1
## Training set 0.1202903

checkresiduals(RCFL_FCST_PWR_S)

## 
##  Ljung-Box test
## 
## data:  Residuals from Simple exponential smoothing
## Q* = 66.674, df = 24, p-value = 6.893e-06
## 
## Model df: 0.   Total lags used: 24

summary(RCFL_FCST_PWR_S)

## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
## ses(y = RCFL_PWR_DIFF, h = 12)
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = 0.0716 
## 
##   sigma:  1.3545
## 
##      AIC     AICc      BIC 
## 1047.964 1048.100 1057.543 
## 
## Error measures:
##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.0002402688 1.346981 0.6002376 -765.1024 991.1566 0.5469343
##                   ACF1
## Training set 0.1202903
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Feb 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Mar 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Apr 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## May 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Jun 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Jul 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Aug 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Sep 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726454
## Oct 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726455
## Nov 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726455
## Dec 2014     0.07162989 -1.664267 1.807526 -2.583195 2.726455

ARIMA has the best result. BIC dropped to 540. AIC it dropped to 524. RMSE has also dropped to 0.966.ARIMA is the best model , I’ll print the prediction using CSV doc.

rslts_2 <- forecast(arima_model, h=12)
rslts_fin <- data.frame(rslts_2)

write.csv(rslts_fin,"results_rcfl.csv", row.names = FALSE)