In part A, I want you to forecast how much cash is taken out of 4 different ATM machines for May 2010. The data is given in a single file. The variable ‘Cash’ is provided in hundreds of dollars, other than that it is straight forward. I am being somewhat ambiguous on purpose to make this have a little more business feeling. Explain and demonstrate your process, techniques used and not used, and your actual forecast. I am giving you data via an excel file, please provide your written report on your findings, visuals, discussion and your R code via an RPubs link along with the actual.rmd file Also please submit the forecast which you will put in an Excel readable file.
The first step we going to work is to look at the data and trying to get a view of what we working with, mean , missing values etc.
library(fpp2)
library(dplyr)
library(caret)
library(readxl)
library(tidyverse)
library(lubridate)
library(ggplot2)
library(TSstudio)After upload the data we can see we missing some columns values where we dont know what atm assignation data and money data, just the date stamp. for this i decided to remove those columns because i thougth if i least i have the ATM assigned or the cash assigned i would tried to work with imputation. we just have 19 missing values.
data <- read_xlsx("ATM624Data.xlsx" ,sheet="ATM Data") %>% mutate(DATE = as.Date(DATE, origin = '1899-12-30'))
summary(data)## DATE ATM Cash
## Min. :2009-05-01 Length:1474 Min. : 0.0
## 1st Qu.:2009-08-01 Class :character 1st Qu.: 0.5
## Median :2009-11-01 Mode :character Median : 73.0
## Mean :2009-10-31 Mean : 155.6
## 3rd Qu.:2010-02-01 3rd Qu.: 114.0
## Max. :2010-05-14 Max. :10919.8
## NA's :19
colSums(is.na(data))## DATE ATM Cash
## 0 14 19
df <- data[complete.cases(data),]
colSums(is.na(df))## DATE ATM Cash
## 0 0 0
The first step i decided to separate de data for the different ATMs for which i would say it can be more easy to manipulate and work with. i also split the data into training and testing data 80/20 in order to run different forecating model against it and see which one can offer me the best accuracy and at the final runs that selected model against the Test data to see how well the select model will be.
After splititng the data and plot them on the grapgh we can see that ATM 1- 2 look very similar with seasional with some increased and decreasing.
ATM 3 seem they didnt have few to almost not transaction.
ATM 4 this data seem to showing no trending or sesacional insight. its most look a stationary. we can also can see a high peek point on the data.
atm_1 <- df %>% filter(ATM == 'ATM1')
ts_atm1 <- ts(atm_1$Cash, start = decimal_date(as.Date(atm_1$DATE)), frequency = 365.25)
data_split_1 <- ts_split(ts.obj = ts_atm1, sample.out = round((0.20 * length(ts_atm1))))
atm1_train_1 <- ts(data_split_1$train, frequency = 7 ,start = 1)
atm1_test_1 <- ts(data_split_1$test,frequency = 7, start = 43)
autoplot(atm1_train_1)ggtsdisplay(atm1_train_1)atm_2 <- df %>% filter(ATM == 'ATM2')
ts_atm2 <- ts(atm_2$Cash, start = decimal_date(as.Date(atm_2$DATE)), frequency = 365.25)
data_split_2 <- ts_split(ts.obj = ts_atm2, sample.out = round((0.20 * length(ts_atm2))))
atm2_train_2 <- ts(data_split_2$train, frequency = 7,start = 1)
atm2_test_2 <- ts(data_split_2$test,frequency = 7,start = 43)
ggtsdisplay(atm2_train_2)atm_3 <- df %>% filter(ATM == 'ATM3')
ts_atm3 <- ts(atm_3$Cash, start = decimal_date(as.Date(atm_3$DATE)), frequency = 365.25)
data_split_3 <- ts_split(ts.obj = ts_atm3, sample.out = round((0.20 * length(ts_atm3))))
atm3_train_3 <- ts(data_split_3$train, frequency = 7,start = 1)
atm3_test_3 <- ts(data_split_3$test,frequency = 7,start = 43)
autoplot(atm3_test_3)atm_4 <- df %>% filter(ATM == 'ATM4')
ts_atm4 <- ts(atm_4$Cash, start = decimal_date(as.Date(atm_4$DATE)), frequency = 365.25)
data_split_4 <- ts_split(ts.obj = ts_atm4, sample.out = round((0.20 * length(ts_atm4))))
atm4_train_4 <- ts(data_split_4$train, frequency = 7,start = 1)
atm4_test_4 <- ts(data_split_4$test,frequency = 7,start = 43)
autoplot(ts_atm4)ggtsdisplay(atm4_train_4) To work with the modeling section
For the modeling section i would work with different models and comparare then an the end. for the accuracy i would be guided by the RMSE results and then do the final corecasting on it.
atm1_ets_boxcox <- ets(atm1_train_1, lambda = BoxCox.lambda(atm1_train_1))
checkresiduals(atm1_ets_boxcox)##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 21.318, df = 5, p-value = 0.0007052
##
## Model df: 9. Total lags used: 14
summary(atm1_ets_boxcox)## ETS(A,N,A)
##
## Call:
## ets(y = atm1_train_1, lambda = BoxCox.lambda(atm1_train_1))
##
## Box-Cox transformation: lambda= 0.4456
##
## Smoothing parameters:
## alpha = 0.0047
## gamma = 0.3462
##
## Initial states:
## l = 13.1244
## s = -7.463 0.9269 1.5747 0.1021 1.1763 1.9437
## 1.7392
##
## sigma: 2.8085
##
## AIC AICc BIC
## 2254.066 2254.854 2290.764
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2.252696 26.86749 17.67147 -80.69129 100.2416 0.9086164 0.124898
atm1_hw <- hw(atm1_train_1, h = 30, lambda = BoxCox.lambda(atm1_train_1))
checkresiduals(atm1_hw)##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 22.778, df = 3, p-value = 4.493e-05
##
## Model df: 11. Total lags used: 14
summary(atm1_hw)##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = atm1_train_1, h = 30, lambda = BoxCox.lambda(atm1_train_1))
##
## Box-Cox transformation: lambda= 0.4456
##
## Smoothing parameters:
## alpha = 0.0094
## beta = 1e-04
## gamma = 0.3271
##
## Initial states:
## l = 14.7484
## b = -0.0023
## s = 0.0816 1.2889 0.029 -5.3729 1.4085 3.3285
## -0.7635
##
## sigma: 2.9733
##
## AIC AICc BIC
## 2289.055 2290.181 2333.093
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2.193244 28.09913 19.11224 -85.87815 106.5143 0.9826969 0.1204975
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 42.42857 25.22821 7.930497 53.92992 2.941563 74.08995
## 42.57143 113.95563 67.950729 173.42321 48.799738 210.57217
## 42.71429 93.05019 52.480459 146.78865 36.068182 180.80030
## 42.85714 67.64372 34.453768 113.55146 21.731170 143.25984
## 43.00000 72.77913 38.010645 120.36907 24.502677 151.00412
## 43.14286 108.12330 63.581162 166.05606 45.170308 202.36716
## 43.28571 84.57192 46.349692 135.83088 31.116728 168.48378
## 43.42857 25.10833 7.218581 55.69406 2.352736 77.43506
## 43.57143 113.67890 65.570900 176.78222 45.881933 216.51435
## 43.71429 92.80289 50.420524 149.85260 33.606502 186.26437
## 43.85714 67.43651 32.826077 116.21100 19.881384 148.06725
## 44.00000 72.56333 36.290739 123.11576 22.522940 155.95325
## 44.14286 107.85452 61.287481 169.33664 42.375767 208.18253
## 44.28571 84.33738 44.427532 138.76741 28.850922 173.74135
## 44.42857 24.98877 6.570942 57.39862 1.856558 80.69393
## 44.57143 113.40254 63.334137 180.00534 43.182357 222.25326
## 44.71429 92.55594 48.489100 152.79494 31.340391 191.54705
## 44.85714 67.22965 31.306962 118.76842 18.195939 152.72312
## 45.00000 72.34789 34.683689 125.75617 20.714277 160.74448
## 45.14286 107.58611 59.132935 172.48524 39.793360 213.80064
## 45.28571 84.10320 42.627552 141.58859 26.770808 178.82713
## 45.42857 24.86952 5.979242 59.05199 1.440103 83.87929
## 45.57143 113.12655 61.221608 183.11150 40.672284 227.81779
## 45.71429 92.30937 46.669325 155.63262 29.244129 196.67421
## 45.85714 67.02314 29.882245 121.23797 16.653203 157.24920
## 46.00000 72.13280 33.174744 128.30513 19.054186 165.40044
## 46.14286 107.31806 57.099229 175.52025 37.395178 219.24956
## 46.28571 83.86938 40.933744 144.31050 24.851972 183.76567
## 46.42857 24.75059 5.436844 60.66081 1.092994 87.00127
## 46.57143 112.85094 59.218351 186.11572 38.328895 233.23092
accuracy(forecast(atm1_hw), atm2_test_2)## ME RMSE MAE MPE MAPE MASE
## Training set 2.193244 28.09913 19.11224 -85.87815 106.5143 0.9826969
## Test set -19.681563 51.38983 45.00543 -470.87705 496.9301 2.3140508
## ACF1 Theil's U
## Training set 0.1204975 NA
## Test set -0.1526943 0.3618045
atm1_ets <- ets(atm1_train_1, model = "ZZZ")
checkresiduals(atm1_ets)##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 12.935, df = 5, p-value = 0.024
##
## Model df: 9. Total lags used: 14
summary(atm1_ets)## ETS(A,N,A)
##
## Call:
## ets(y = atm1_train_1, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.0124
## gamma = 0.3265
##
## Initial states:
## l = 83.4839
## s = -39.541 -5.2 30.3704 -1.0723 5.45 28.4926
## -18.4997
##
## sigma: 26.5863
##
## AIC AICc BIC
## 3557.752 3558.540 3594.450
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.07198656 26.17046 17.37164 -87.8154 104.3711 0.8932004 0.1040627
atm1_arima <- auto.arima(atm1_train_1,seasonal = TRUE, stepwise = FALSE)
checkresiduals(atm1_arima)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1)(0,1,1)[7]
## Q* = 9.1647, df = 12, p-value = 0.6888
##
## Model df: 2. Total lags used: 14
summary(atm1_arima)## Series: atm1_train_1
## ARIMA(0,0,1)(0,1,1)[7]
##
## Coefficients:
## ma1 sma1
## 0.1406 -0.6563
## s.e. 0.0607 0.0533
##
## sigma^2 estimated as 679.1: log likelihood=-1325.23
## AIC=2656.45 AICc=2656.54 BIC=2667.39
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3962813 25.65191 16.47634 -81.61352 98.40314 0.8471662
## ACF1
## Training set 0.00293361
accuracy(forecast(atm1_ets_boxcox), atm1_test_1)## ME RMSE MAE MPE MAPE MASE
## Training set 2.252696 26.86749 17.67147 -80.69129 100.2416 0.9086164
## Test set 9.510868 49.38465 40.21452 -912.83143 956.9561 2.0677162
## ACF1 Theil's U
## Training set 0.1248980 NA
## Test set -0.1259892 0.4054986
accuracy(forecast(atm1_hw), atm1_test_1)## ME RMSE MAE MPE MAPE MASE
## Training set 2.193244 28.09913 19.11224 -85.87815 106.5143 0.9826969
## Test set 7.510744 57.56973 46.49981 -513.54490 559.2138 2.3908875
## ACF1 Theil's U
## Training set 0.1204975 NA
## Test set -0.1850659 0.4012037
accuracy(forecast(atm1_ets), atm1_test_1)## ME RMSE MAE MPE MAPE MASE
## Training set 0.07198656 26.17046 17.37164 -87.8154 104.3711 0.8932004
## Test set 5.92157085 48.08707 37.47349 -925.6206 963.7496 1.9267799
## ACF1 Theil's U
## Training set 0.1040627 NA
## Test set -0.1247971 0.333755
accuracy(forecast(atm1_arima), atm1_test_1)## ME RMSE MAE MPE MAPE MASE
## Training set 0.3962813 25.65191 16.47634 -81.61352 98.40314 0.8471662
## Test set 5.8371752 47.41529 36.96936 -921.75305 959.25575 1.9008590
## ACF1 Theil's U
## Training set 0.00293361 NA
## Test set -0.12271852 0.3514678
atm2_ets_boxcox <- ets(atm2_train_2, lambda = BoxCox.lambda(atm2_train_2))
checkresiduals(atm2_ets_boxcox)##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 22.49, df = 5, p-value = 0.0004225
##
## Model df: 9. Total lags used: 14
summary(atm2_ets_boxcox)## ETS(A,N,A)
##
## Call:
## ets(y = atm2_train_2, lambda = BoxCox.lambda(atm2_train_2))
##
## Box-Cox transformation: lambda= 0.4924
##
## Smoothing parameters:
## alpha = 1e-04
## gamma = 0.3659
##
## Initial states:
## l = 14.1658
## s = 0.9831 -1.0513 -1.5 -0.6218 3.8044 -0.9767
## -0.6377
##
## sigma: 3.8387
##
## AIC AICc BIC
## 2435.302 2436.090 2472.001
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.8341238 28.02926 19.68439 -Inf Inf 0.8950325 0.007075576
atm2_hw <- hw(atm2_train_2, h = 30, lambda = BoxCox.lambda(atm2_train_2))
checkresiduals(atm2_hw)##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 21.16, df = 3, p-value = 9.752e-05
##
## Model df: 11. Total lags used: 14
summary(atm2_hw)##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = atm2_train_2, h = 30, lambda = BoxCox.lambda(atm2_train_2))
##
## Box-Cox transformation: lambda= 0.4924
##
## Smoothing parameters:
## alpha = 2e-04
## beta = 2e-04
## gamma = 0.4057
##
## Initial states:
## l = 14.3336
## b = -0.0142
## s = 1.3234 2.4768 -6.2433 -2.8536 3.554 1.843
## -0.1003
##
## sigma: 3.924
##
## AIC AICc BIC
## 2449.976 2451.103 2494.015
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2.386938 28.62687 19.73019 -Inf Inf 0.8971148 0.000950284
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 42.42857 22.565809 4.7902647 53.79434 0.72217720 75.83760
## 42.57143 3.176661 -0.4978505 18.82536 -4.17065880 32.52425
## 42.71429 84.410181 43.4638255 139.09096 27.30180342 173.63408
## 42.85714 50.505846 20.4680818 94.16893 10.02087392 122.84489
## 43.00000 22.992293 4.9855009 54.45613 0.79806044 76.62507
## 43.14286 50.402898 20.4029982 94.02770 9.97559184 122.68327
## 43.28571 59.977296 26.6025003 107.01397 14.40946329 137.48723
## 43.42857 22.047369 3.7327605 56.03470 0.24231591 80.45429
## 43.57143 2.986871 -0.9190641 20.14934 -5.79095941 35.55149
## 43.71429 83.394478 40.0885582 142.70571 23.58368399 180.62891
## 43.85714 49.723942 18.1862993 97.13852 7.83836701 128.72392
## 44.00000 22.468874 3.9049634 56.71046 0.28667743 81.26597
## 44.14286 49.621807 18.1249521 96.99518 7.79834271 128.55866
## 44.28571 59.123832 23.9863830 110.18141 11.75819709 143.70853
## 44.42857 21.535045 2.8522434 58.11948 0.02499838 84.83512
## 44.57143 2.803012 -1.4398589 21.39734 -7.57120355 38.46488
## 44.71429 82.385016 37.0119611 146.04531 20.32202606 187.20486
## 44.85714 48.948231 16.1463730 99.88920 6.02435256 134.27018
## 45.00000 21.951572 3.0026414 58.80815 0.04022355 85.66944
## 45.14286 48.846908 16.0885377 99.74403 5.98926062 134.10173
## 45.28571 58.276576 21.6295000 113.11288 9.50248656 149.57097
## 45.42857 21.028834 2.1221179 60.07533 -0.02315056 89.02087
## 45.57143 2.625080 -2.0489735 22.58141 -9.49314987 41.28284
## 45.71429 81.381794 34.1856239 149.15833 17.44283517 193.43631
## 45.85714 48.178710 14.3101965 102.45924 4.52046043 139.54224
## 46.00000 21.440386 2.2517403 60.77604 -0.01208811 89.87656
## 46.14286 48.078199 14.2556947 102.31248 4.49004456 139.37098
## 46.28571 57.435528 19.4906115 115.84977 7.57923548 155.13793
## 46.42857 20.528735 1.5227010 61.92193 -0.20897724 93.04170
## 46.57143 2.453069 -2.7379339 23.71054 -11.54317533 44.01915
accuracy(forecast(atm2_hw), atm2_test_2)## ME RMSE MAE MPE MAPE MASE
## Training set 2.386938 28.62687 19.73019 -Inf Inf 0.8971148
## Test set 21.821679 64.11442 58.57748 -311.0392 396.5721 2.6634683
## ACF1 Theil's U
## Training set 0.000950284 NA
## Test set -0.096850736 0.8186214
atm2_ets <- ets(atm2_train_2, model = "ZZZ")
checkresiduals(atm2_ets)##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 22.325, df = 5, p-value = 0.000454
##
## Model df: 9. Total lags used: 14
summary(atm2_ets)## ETS(A,N,A)
##
## Call:
## ets(y = atm2_train_2, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 1e-04
## gamma = 0.3885
##
## Initial states:
## l = 80.2896
## s = -8.8155 32.0733 -17.9916 -31.974 16.389 9.229
## 1.0898
##
## sigma: 28.1595
##
## AIC AICc BIC
## 3591.096 3591.884 3627.794
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -2.164792 27.71907 19.27427 -Inf Inf 0.8763845 0.00547008
atm2_arima <- auto.arima(atm2_train_2,seasonal = TRUE, stepwise = FALSE)
checkresiduals(atm2_arima)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0)(0,1,1)[7]
## Q* = 16.427, df = 13, p-value = 0.2268
##
## Model df: 1. Total lags used: 14
accuracy(forecast(atm2_ets_boxcox), atm2_test_2)## ME RMSE MAE MPE MAPE MASE
## Training set 0.8341238 28.02926 19.68439 -Inf Inf 0.8950325
## Test set 16.7474223 62.23748 59.06910 -297.4056 377.5899 2.6858218
## ACF1 Theil's U
## Training set 0.007075576 NA
## Test set -0.067262347 0.8075132
accuracy(forecast(atm2_hw), atm2_test_2)## ME RMSE MAE MPE MAPE MASE
## Training set 2.386938 28.62687 19.73019 -Inf Inf 0.8971148
## Test set 21.821679 64.11442 58.57748 -311.0392 396.5721 2.6634683
## ACF1 Theil's U
## Training set 0.000950284 NA
## Test set -0.096850736 0.8186214
accuracy(forecast(atm2_ets), atm2_test_2)## ME RMSE MAE MPE MAPE MASE
## Training set -2.164792 27.71907 19.27427 -Inf Inf 0.8763845
## Test set 15.453119 61.42927 58.24648 -300.6917 378.6604 2.6484183
## ACF1 Theil's U
## Training set 0.00547008 NA
## Test set -0.06875717 0.7847923
accuracy(forecast(atm2_arima), atm2_test_2)## ME RMSE MAE MPE MAPE MASE
## Training set -1.39291 26.36780 17.81162 -Inf Inf 0.8098791
## Test set 15.02879 61.29706 58.07543 -302.4333 379.773 2.6406405
## ACF1 Theil's U
## Training set -0.003737894 NA
## Test set -0.069350477 0.779194
atm3_mean <- meanf(atm3_train_3,h=5)
summary(atm3_mean)##
## Forecast method: Mean
##
## Model Information:
## $mu
## [1] 0
##
## $mu.se
## [1] 0
##
## $sd
## [1] 0
##
## $bootstrap
## [1] FALSE
##
## $call
## meanf(y = atm3_train_3, h = 5)
##
## attr(,"class")
## [1] "meanf"
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0 0 0 NaN NaN NaN NaN
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 42.71429 0 0 0 0 0
## 42.85714 0 0 0 0 0
## 43.00000 0 0 0 0 0
## 43.14286 0 0 0 0 0
## 43.28571 0 0 0 0 0
atm4_ets_boxcox <- ets(atm4_train_4, lambda = BoxCox.lambda(atm4_train_4))
checkresiduals(atm4_ets_boxcox)##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 26.306, df = 5, p-value = 7.782e-05
##
## Model df: 9. Total lags used: 14
summary(atm4_ets_boxcox)## ETS(A,N,A)
##
## Call:
## ets(y = atm4_train_4, lambda = BoxCox.lambda(atm4_train_4))
##
## Box-Cox transformation: lambda= -0.1746
##
## Smoothing parameters:
## alpha = 0.058
## gamma = 1e-04
##
## Initial states:
## l = 3.4035
## s = -0.7293 0.0304 0.1361 0.1802 0.1332 0.0931
## 0.1563
##
## sigma: 0.5354
##
## AIC AICc BIC
## 1303.624 1304.407 1340.391
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 215.4884 725.234 316.5507 -174.6482 241.8772 0.7623691 -0.02904169
atm4_hw <- hw(atm4_train_4, h = 30, lambda = BoxCox.lambda(atm4_train_4))
checkresiduals(atm4_hw)##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 25.614, df = 3, p-value = 1.149e-05
##
## Model df: 11. Total lags used: 14
summary(atm4_hw)##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = atm4_train_4, h = 30, lambda = BoxCox.lambda(atm4_train_4))
##
## Box-Cox transformation: lambda= -0.1746
##
## Smoothing parameters:
## alpha = 0.1122
## beta = 1e-04
## gamma = 1e-04
##
## Initial states:
## l = 3.6281
## b = -2e-04
## s = -0.7606 0.0391 0.1621 0.1593 0.1302 0.1124
## 0.1575
##
## sigma: 0.5388
##
## AIC AICc BIC
## 1309.234 1310.353 1353.355
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 192.8838 720.199 312.0052 -178.6733 242.3009 0.7514221 -0.03153323
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 42.71429 330.43619 64.00342 3328.3477 31.476823 19104.5790
## 42.85714 51.29260 14.87576 249.3284 8.489441 716.4556
## 43.00000 461.93341 80.63590 5758.0862 38.251515 41944.8559
## 43.14286 405.26627 72.53429 4794.3025 34.705134 32882.5928
## 43.28571 426.31896 74.62056 5283.9673 35.430835 38237.2866
## 43.42857 463.74094 78.66616 6143.0171 36.953948 48067.7059
## 43.57143 467.21559 78.38115 6338.2020 36.681467 51027.5217
## 43.71429 329.41244 60.03608 3753.5878 28.905526 24674.6739
## 43.85714 51.17774 14.15792 268.9750 7.934412 825.9215
## 44.00000 460.41622 75.48046 6565.8444 35.049375 56161.2648
## 44.14286 403.96522 67.99608 5441.0550 31.857366 43430.1566
## 44.28571 424.93819 69.94728 6006.6098 32.524571 50825.9873
## 44.42857 462.21678 73.71418 7003.4084 33.912906 64576.6978
## 44.57143 465.67801 73.46829 7227.1965 33.677666 68685.4874
## 44.71429 328.39241 56.45122 4228.6427 26.638669 31997.2730
## 44.85714 51.06319 13.49813 289.8147 7.434956 951.5403
## 45.00000 458.90488 70.82974 7480.8800 32.231420 75706.7569
## 45.14286 402.66906 63.89384 6169.9131 29.345200 57698.1626
## 45.28571 423.56266 65.72181 6822.9077 29.959820 67995.9334
## 45.42857 460.69850 69.23695 7979.0411 31.229197 87398.5677
## 45.57143 464.14636 69.02348 8235.8691 31.024609 93163.9458
## 45.71429 327.37609 53.19505 4760.1936 24.627200 41700.6470
## 45.85714 50.94895 12.88902 311.9484 6.983084 1095.9967
## 46.00000 457.39937 66.61257 8519.3064 29.735506 102884.2257
## 46.14286 401.37779 60.16704 6992.7015 27.115173 77194.4216
## 46.28571 422.19236 61.88227 7746.6053 27.682430 91667.5304
## 46.42857 459.18607 65.16910 9087.3530 28.846306 119324.5349
## 46.57143 462.62064 64.98274 9382.4134 28.667208 127512.7654
## 46.71429 326.36346 50.22383 5355.8939 22.832111 54671.7560
## 46.85714 50.83500 12.32452 335.4835 6.572322 1262.4718
accuracy(forecast(atm4_hw), atm4_test_4)## ME RMSE MAE MPE MAPE MASE
## Training set 192.8838 720.1990 312.0052 -178.6733 242.3009 0.7514221
## Test set 126.3467 339.1838 261.9357 -191.5877 236.9195 0.6308365
## ACF1 Theil's U
## Training set -0.03153323 NA
## Test set -0.10969069 0.5161387
atm4_ets <- ets(atm4_train_4, model = "ZZZ")
checkresiduals(atm4_ets)##
## Ljung-Box test
##
## data: Residuals from ETS(M,A,M)
## Q* = 6.4729, df = 3, p-value = 0.09074
##
## Model df: 11. Total lags used: 14
summary(atm4_ets)## ETS(M,A,M)
##
## Call:
## ets(y = atm4_train_4, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.0687
## beta = 1e-04
## gamma = 1e-04
##
## Initial states:
## l = 591.3649
## b = 8.1463
## s = 0.1177 0.9225 2.2294 0.9748 0.8182 0.7602
## 1.1771
##
## sigma: 0.8218
##
## AIC AICc BIC
## 5152.496 5153.614 5196.617
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -133.8603 734.2711 367.6871 -517.1737 539.2188 0.8855243
## ACF1
## Training set -0.0110701
atm4_arima <- auto.arima(atm4_train_4,seasonal = TRUE, stepwise = FALSE)
checkresiduals(atm4_arima)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0) with non-zero mean
## Q* = 4.2431, df = 13, p-value = 0.9883
##
## Model df: 1. Total lags used: 14
accuracy(forecast(atm4_ets_boxcox), atm4_test_4)## ME RMSE MAE MPE MAPE MASE
## Training set 215.4884 725.234 316.5507 -174.64822 241.8772 0.7623691
## Test set 195.5207 440.006 346.1958 -48.72414 104.9477 0.8337653
## ACF1 Theil's U
## Training set -0.02904169 NA
## Test set -0.12234133 0.6723007
accuracy(forecast(atm4_hw), atm4_test_4)## ME RMSE MAE MPE MAPE MASE
## Training set 192.8838 720.1990 312.0052 -178.6733 242.3009 0.7514221
## Test set 126.3467 339.1838 261.9357 -191.5877 236.9195 0.6308365
## ACF1 Theil's U
## Training set -0.03153323 NA
## Test set -0.10969069 0.5161387
accuracy(forecast(atm4_ets), atm4_test_4)## ME RMSE MAE MPE MAPE MASE
## Training set -133.8603 734.2711 367.6871 -517.1737 539.2188 0.8855243
## Test set -375.6622 660.7890 520.0316 -236.8685 250.5239 1.2524253
## ACF1 Theil's U
## Training set -0.0110701 NA
## Test set 0.0259293 3.100936
accuracy(forecast(atm4_arima), atm4_test_4)## ME RMSE MAE MPE MAPE MASE
## Training set -1.476213e-10 709.3400 339.3748 -584.4829 614.4673 0.8173381
## Test set 7.907664e+01 413.2063 350.1456 -435.6746 477.3959 0.8432779
## ACF1 Theil's U
## Training set -0.01204745 NA
## Test set -0.01326018 0.9031432
After running couple models on ATM 1,2,4 we seeing that the ARIMA works better on the trainig data set on ATM 1,2 taking reference the RMSE value. in the case of ATM 3 we dont have enought data points and even after runnig a simple meanf method model we didnt get any result back. for the ATM 4 we see that method model Holt-Winters.
In cnlusion i would say that for ATM 1, 2 the selected model will be ARIMA and for ATM 4 will be Holt-Winters.
atm4csv <- forecast(atm4_hw) %>% as.vector()
atm2csv <- forecast(atm2_arima) %>% as.vector()
atm1csv <- forecast(atm1_arima) %>% as.vector()
write.csv(atm4csv,"ATM4.csv")
write.csv(atm2csv,"ATM2.csv")
write.csv(atm1csv,"ATM1.csv")Part B consists of a simple dataset of residential power usage for January 1998 until December 2013. Your assignment is to model these data and a monthly forecast for 2014. The data is given in a single file. The variable ‘KWH’ is power consumption in Kilowatt hours, the rest is straight forward. Add this to your existing files above.
data <- read_xlsx("ResidentialCustomerForecastLoad-624.xlsx" )
summary(data)## CaseSequence YYYY-MMM KWH
## Min. :733.0 Length:192 Min. : 770523
## 1st Qu.:780.8 Class :character 1st Qu.: 5429912
## Median :828.5 Mode :character Median : 6283324
## Mean :828.5 Mean : 6502475
## 3rd Qu.:876.2 3rd Qu.: 7620524
## Max. :924.0 Max. :10655730
## NA's :1
colSums(is.na(data))## CaseSequence YYYY-MMM KWH
## 0 0 1
df <- data[complete.cases(data),]
colSums(is.na(df))## CaseSequence YYYY-MMM KWH
## 0 0 0
data## # A tibble: 192 x 3
## CaseSequence `YYYY-MMM` KWH
## <dbl> <chr> <dbl>
## 1 733 1998-Jan 6862583
## 2 734 1998-Feb 5838198
## 3 735 1998-Mar 5420658
## 4 736 1998-Apr 5010364
## 5 737 1998-May 4665377
## 6 738 1998-Jun 6467147
## 7 739 1998-Jul 8914755
## 8 740 1998-Aug 8607428
## 9 741 1998-Sep 6989888
## 10 742 1998-Oct 6345620
## # … with 182 more rows
resi_ts <- ts(df$KWH,frequency = 12)
autoplot(resi_ts)ggtsdisplay(resi_ts)The data seem seasional and the ACF shows they are a lot correclation between them. im going to apply couple model in order to obtain the best model fit for a forecasting.
resi_ets <- ets(resi_ts, lambda = BoxCox.lambda(resi_ts))
checkresiduals(resi_ets)##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 11.05, df = 10, p-value = 0.3537
##
## Model df: 14. Total lags used: 24
summary(resi_ets)## ETS(A,N,A)
##
## Call:
## ets(y = resi_ts, lambda = BoxCox.lambda(resi_ts))
##
## Box-Cox transformation: lambda= 0.0636
##
## Smoothing parameters:
## alpha = 0.0296
## gamma = 0.183
##
## Initial states:
## l = 26.7261
## s = -0.1972 -0.636 -0.2147 0.5037 0.7337 0.6417
## -0.0319 -0.5635 -0.5283 -0.2714 0.0953 0.4686
##
## sigma: 0.5832
##
## AIC AICc BIC
## 812.6801 815.4230 861.4642
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 198766.3 1089557 699781.3 -2.018235 13.66584 0.9404423 0.2675912
resi_hw<- hw(resi_ts, h = 30, lambda = BoxCox.lambda(resi_ts))
checkresiduals(resi_hw)##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 11.079, df = 8, p-value = 0.1972
##
## Model df: 16. Total lags used: 24
summary(resi_hw)##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = resi_ts, h = 30, lambda = BoxCox.lambda(resi_ts))
##
## Box-Cox transformation: lambda= 0.0636
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
## gamma = 0.1563
##
## Initial states:
## l = 26.6118
## b = 0.0025
## s = -0.1048 -0.5347 -0.2419 0.38 0.7336 0.5937
## -0.0297 -0.4267 -0.5952 -0.215 0.0272 0.4137
##
## sigma: 0.5826
##
## AIC AICc BIC
## 814.1262 817.6638 869.4149
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 111216.5 1082908 695238.3 -3.546315 13.76865 0.934337 0.290071
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 16 8032112 6107457 10513838 5272692 12102073
## Jan 17 8272870 6293788 10823559 5435088 12455361
## Feb 17 6919521 5247625 9080556 4523897 10465998
## Mar 17 6158883 4661091 8098571 4013723 9343836
## Apr 17 5674042 4287850 7471633 3689368 8626807
## May 17 6571693 4979271 8631739 4290412 9953247
## Jun 17 6448716 4884448 8472966 4207937 9771805
## Jul 17 8866309 6753445 11586385 5835877 13325139
## Aug 17 8426745 6412922 11021439 5538939 12681034
## Sep 17 6583503 4988376 8646987 4298332 9970673
## Oct 17 5888539 4452908 7749102 3832776 8944213
## Nov 17 6760867 5125188 8875891 4417354 10232210
## Dec 17 8126915 6159780 10670897 5308608 12302469
## Jan 18 8370331 6347602 10984947 5472036 12661232
## Feb 18 7001975 5293017 9217489 4555011 10640933
## Mar 18 6232821 4701714 8221597 4041537 9501159
## Apr 18 5742519 4325419 7585725 3715071 8772808
## May 18 6650261 5022480 8762336 4320013 10120165
## Jun 18 6525907 4926884 8601317 4237002 9935879
## Jul 18 8970299 6810915 11758421 5875350 13544480
## Aug 18 8525901 6467671 11185617 5576524 12890446
## Sep 18 6662203 5031650 8777816 4327971 10137893
## Oct 18 5959435 4491815 7867178 3859395 9095273
## Nov 18 6841550 5169566 9009969 4447753 10403549
## Dec 18 8222766 6212430 10830306 5344630 12506240
## Jan 19 8468865 6401750 11148749 5509090 12870571
## Feb 19 7085349 5338692 9356495 4586214 10818851
## Mar 19 6307591 4742591 8346502 4069430 9661189
## Apr 19 5811769 4363222 7701571 3740845 8921337
## May 19 6729708 5065955 8894927 4349691 10289955
accuracy(forecast(atm4_hw), atm4_test_4)## ME RMSE MAE MPE MAPE MASE
## Training set 192.8838 720.1990 312.0052 -178.6733 242.3009 0.7514221
## Test set 126.3467 339.1838 261.9357 -191.5877 236.9195 0.6308365
## ACF1 Theil's U
## Training set -0.03153323 NA
## Test set -0.10969069 0.5161387
resi_arima <- auto.arima(resi_ts,seasonal = TRUE, stepwise = FALSE)
checkresiduals(resi_arima)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1)(0,1,2)[12] with drift
## Q* = 17.494, df = 20, p-value = 0.6207
##
## Model df: 4. Total lags used: 24
accuracy(forecast(resi_ets))## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 198766.3 1089557 699781.3 -2.018235 13.66584 0.9404423 0.2675912
accuracy(forecast(resi_hw))## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 111216.5 1082908 695238.3 -3.546315 13.76865 0.934337 0.290071
accuracy(forecast(resi_arima))## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -10349.99 927823.9 576140 -5.029934 12.4629 0.7742797 -0.002808329
write.csv(forecast(resi_arima),"residential-result.csv")After applied couple model method we see the best fit for this very seasional data its the ARIMA model which give the best Accuracy by the RMSE measure.
Part C consists of two data sets. These are simple 2 columns sets, however they have different time stamps. Your optional assignment is to time-base sequence the data and aggregate based on hour (example of what this looks like, follows). Note for multiple recordings within an hour, take the mean. Then to determine if the data is stationary and can it be forecast. If so, provide a week forward forecast and present results via Rpubs and .rmd and the forecast in an Excel readable file.
data_1 <- read_xlsx("Waterflow_Pipe1.xlsx" , col_types = c('date', 'numeric')) %>% mutate(time =format(`Date Time`,"%H:%M:%S"),date = format(`Date Time`,"%d/%m/%Y"))
data_2 <- read_xlsx("Waterflow_Pipe2.xlsx" , col_types = c('date', 'numeric'))
df_1 <- ts(data_1$WaterFlow, frequency = 60)
df_2 <- ts(data_2$WaterFlow,frequency = 24)
autoplot(df_1)autoplot(df_2)data_1$`Date Time` <- cut(as.POSIXct(paste(data_1$date, data_1$time),
format="%d/%m/%Y %H:%M:%S"), breaks="hour")
data_1_new <- aggregate(WaterFlow ~`Date Time`, data_1, mean)
total_data <- rbind(data_1_new,
data_2
)## Warning in `[<-.factor`(`*tmp*`, ri, value = structure(c(1445562000,
## 1445565600, : invalid factor level, NA generated
total_df <- ts(total_data$WaterFlow,frequency = 24)
autoplot(total_df) water_arima <- auto.arima(total_df,seasonal = TRUE, stepwise = FALSE)
checkresiduals(water_arima)##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(0,0,1)[24]
## Q* = 62.53, df = 45, p-value = 0.04278
##
## Model df: 3. Total lags used: 48
write.csv(forecast(water_arima),"water_result.csv")