#Load Required Libraries
suppressMessages(suppressWarnings(library(fpp2)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(dplyr)))
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(scales)))
suppressMessages(suppressWarnings(library(forecast)))
suppressMessages(suppressWarnings(library(lubridate)))
suppressMessages(suppressWarnings(library(readxl)))
suppressMessages(suppressWarnings(library(tidyr)))
suppressMessages(suppressWarnings(library(plotly)))
df <- readxl::read_excel('ATM624Data.xlsx') %>%
drop_na() %>%
spread(ATM, Cash) %>%
mutate(DATE = as.Date(DATE, origin='1899-12-30'))
atm <- ts(df %>% select(-DATE))
atm %>%
summary()
## ATM1 ATM2 ATM3 ATM4
## Min. : 1.00 Min. : 0.00 Min. : 0.0000 Min. : 1.563
## 1st Qu.: 73.00 1st Qu.: 25.50 1st Qu.: 0.0000 1st Qu.: 124.334
## Median : 91.00 Median : 67.00 Median : 0.0000 Median : 403.839
## Mean : 83.89 Mean : 62.58 Mean : 0.7206 Mean : 474.043
## 3rd Qu.:108.00 3rd Qu.: 93.00 3rd Qu.: 0.0000 3rd Qu.: 704.507
## Max. :180.00 Max. :147.00 Max. :96.0000 Max. :10919.762
## NA's :3 NA's :2
head(atm)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## ATM1 ATM2 ATM3 ATM4
## 1 96 107 0 776.99342
## 2 82 89 0 524.41796
## 3 85 90 0 792.81136
## 4 90 55 0 908.23846
## 5 99 79 0 52.83210
## 6 88 19 0 52.20845
df %>% gather(atm, Cash, -DATE) %>%
ggplot(aes(x = DATE, y = Cash, col = atm)) +
geom_line(show.legend = FALSE) +
facet_wrap(~ atm, ncol = 1, scales = "free_y") +
labs(title = "Withdrawal by all 4 ATM's", x = "Date") +
scale_y_continuous("Cash withdrawals")
Based on above graph we can observe that Atm1 and Atm2 has seasonal patterns. Atm3 has only last 3 days values and Atm4 has an outlier.
atm_1 <- atm[, "ATM1"]
atm_2 <- atm[, "ATM2"]
atm_3 <- atm[, "ATM3"]
atm_4 <- atm[, "ATM4"]
atm1 <- ts(atm_1, frequency = 7)
atm2 <- ts(atm_2, frequency = 7)
atm3 <- ts(atm_3, frequency = 7)
atm4 <- ts(atm_4, frequency = 7)
ggtsdisplay(atm1, main = "Withdrawals from ATM1")
ggtsdisplay(atm2, main = "Withdrawals from ATM2")
ggtsdisplay(atm3, main = "Withdrawals from ATM3")
ggtsdisplay(atm4, main = "Withdrawals from ATM4")
fc_atm1_ets <- ets(atm1)
autoplot(fc_atm1_ets) +
autolayer(fitted(fc_atm1_ets)) +
ylab("Cash withdrawals") + xlab("days")
summary(fc_atm1_ets)
## ETS(A,N,A)
##
## Call:
## ets(y = atm1)
##
## Smoothing parameters:
## alpha = 0.01
## gamma = 0.3803
##
## Initial states:
## l = 88.8307
## s = 1.0742 14.6617 15.5558 25.567 -57.3193 -13.1173
## 13.5778
##
## sigma: 24.476
##
## AIC AICc BIC
## 3798.046 3798.777 3835.476
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.6439355 24.12043 15.54344 -112.5864 127.2582 0.8625817
## ACF1
## Training set 0.1567354
checkresiduals(fc_atm1_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 29.858, df = 5, p-value = 1.573e-05
##
## Model df: 9. Total lags used: 14
atm1_lambda <- BoxCox.lambda(atm1)
fc_arima_atm1 <- auto.arima(atm1)
summary(fc_arima_atm1)
## Series: atm1
## ARIMA(1,0,0)(2,1,0)[7]
##
## Coefficients:
## ar1 sar1 sar2
## 0.1511 -0.4698 -0.2048
## s.e. 0.0535 0.0517 0.0514
##
## sigma^2 estimated as 606.7: log likelihood=-1641.05
## AIC=3290.11 AICc=3290.22 BIC=3305.63
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.08937144 24.28804 15.37384 -98.72979 115.434 0.8697514
## ACF1
## Training set 0.009201474
checkresiduals(fc_arima_atm1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(2,1,0)[7]
## Q* = 18.297, df = 11, p-value = 0.07494
##
## Model df: 3. Total lags used: 14
fc_atm2_ets <- ets(atm2)
autoplot(fc_atm2_ets) +
autolayer(fitted(fc_atm2_ets)) +
ylab("Cash withdrawals") + xlab("days")
summary(fc_atm2_ets)
## ETS(A,N,A)
##
## Call:
## ets(y = atm2)
##
## Smoothing parameters:
## alpha = 1e-04
## gamma = 0.3912
##
## Initial states:
## l = 66.8249
## s = -15.0241 17.0779 -5.3735 12.1217 13.3698 25.1207
## -47.2925
##
## sigma: 25.0036
##
## AIC AICc BIC
## 3784.996 3785.732 3822.362
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.3870142 24.63795 17.33529 -Inf Inf 0.8774794 0.006439035
checkresiduals(fc_atm2_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 32.511, df = 5, p-value = 4.705e-06
##
## Model df: 9. Total lags used: 14
atm2_lambda <- BoxCox.lambda(atm2)
fc_arima_atm2 <- auto.arima(atm2)
summary(fc_arima_atm2)
## Series: atm2
## ARIMA(0,0,0)(2,1,0)[7]
##
## Coefficients:
## sar1 sar2
## -0.5747 -0.1766
## s.e. 0.0521 0.0518
##
## sigma^2 estimated as 652.8: log likelihood=-1659.31
## AIC=3324.61 AICc=3324.68 BIC=3336.26
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.1179362 25.23195 17.27829 -Inf Inf 0.8434248 0.01502367
checkresiduals(fc_arima_atm2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0)(2,1,0)[7]
## Q* = 28.213, df = 12, p-value = 0.005149
##
## Model df: 2. Total lags used: 14
ATM3 has only 3 values which are present only in the last 3 days. It will be hard to forecast with the limited amount of data that we have. In this case we can use the mean value.
fc_atm4_ets <- ets(atm4)
autoplot(fc_atm4_ets) +
autolayer(fitted(fc_atm4_ets)) +
ylab("Cash withdrawals") + xlab("days")
summary(fc_atm4_ets)
## ETS(M,N,A)
##
## Call:
## ets(y = atm4)
##
## Smoothing parameters:
## alpha = 0.0013
## gamma = 0.003
##
## Initial states:
## l = 372.4108
## s = -146.6545 -57.0431 217.4408 -6.2437 37.4918 5.355
## -50.3463
##
## sigma: 1.2874
##
## AIC AICc BIC
## 6690.624 6691.246 6729.623
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 77.03608 645.1182 312.3148 -510.4324 551.5781 0.7771069
## ACF1
## Training set -0.01055406
checkresiduals(fc_atm4_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,A)
## Q* = 8.1066, df = 5, p-value = 0.1505
##
## Model df: 9. Total lags used: 14
atm4_lambda <- BoxCox.lambda(atm4)
fc_arima_atm4 <- auto.arima(atm4)
summary(fc_arima_atm4)
## Series: atm4
## ARIMA(0,0,0) with non-zero mean
##
## Coefficients:
## mean
## 474.0433
## s.e. 34.0248
##
## sigma^2 estimated as 423718: log likelihood=-2882.03
## AIC=5768.06 AICc=5768.1 BIC=5775.86
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.514697e-10 650.0437 323.6001 -616.5219 646.8627 0.8051872
## ACF1
## Training set -0.009358419
checkresiduals(fc_arima_atm4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0) with non-zero mean
## Q* = 4.6668, df = 13, p-value = 0.9818
##
## Model df: 1. Total lags used: 14
Based on the above resultswe can use ARIMA output to fit the model.
atm1_fit<-Arima(atm1, order = c(1, 0, 0), seasonal = c(2, 1, 0), lambda = atm1_lambda)
atm1_forecast<-forecast(atm1_fit, 31, level = 95)
atm2_fit<-Arima(atm2, order = c(1, 0, 0), seasonal = c(2, 1, 0), lambda = atm2_lambda)
atm2_forecast<-forecast(atm2_fit, 31, level = 95)
atm3_forecast<-meanf(atm3, 31, level = 95)
atm4_fit<-Arima(atm4, order = c(0, 0, 0), lambda = atm4_lambda)
atm4_forecast<-forecast(atm4_fit, 31, level = 95)
data_frame(DATE = rep(max(df$DATE) + 1:31, 4), ATM = rep(names(df)[-1], each = 31), Cash = c(atm1_forecast$mean, atm2_forecast$mean, atm3_forecast$mean, atm4_forecast$mean)) %>%
write_csv("DATA624_ATM_PREDICTION.csv")
power_df <- read_excel("ResidentialCustomerForecastLoad-624.xlsx")
power_df <- ts(power_df[, "KWH"], start = c(1998, 1), frequency = 12)
autoplot(power_df)
power_lambda <- BoxCox.lambda(power_df)
power_trans <- BoxCox(power_df, power_lambda)
ggtsdisplay(diff(power_trans, 12))
fc_arima_power <- auto.arima(power_trans)
summary(fc_arima_power)
## Series: power_trans
## ARIMA(2,1,1)(0,0,2)[12]
##
## Coefficients:
## ar1 ar2 ma1 sma1 sma2
## 0.2593 -0.1585 -0.9752 0.1980 0.2061
## s.e. 0.0743 0.0769 0.0171 0.0816 0.0674
##
## sigma^2 estimated as 1.7: log likelihood=-319.45
## AIC=650.9 AICc=651.36 BIC=670.41
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1024691 1.283068 0.796867 0.134273 1.878771 1.160183 -0.03283777
Box.test(resid(fc_arima_power), type = "L", fitdf = 3, lag = 12)
##
## Box-Ljung test
##
## data: resid(fc_arima_power)
## X-squared = 20.847, df = 9, p-value = 0.01335
The p-value 0.01335 is > 0.05 and we used lag 12 for high patterns. Based on the results fir suggests that residuals may be white noise.
power_fit <- Arima(power_df, order = c(2, 1, 1), seasonal = c(0, 0, 2), lambda = power_lambda)
ggtsdisplay(resid(power_fit), plot.type = "histogram")
power_forecast <- forecast(power_fit, 12, level = 95)
autoplot(power_forecast)
data_frame(`YYYY-MMM` = paste0(2014, "-", month.abb), KWH = power_forecast$mean) %>%
write_csv("DATA624_POWER_FORECASTING.csv")
water_1 = read_excel("Waterflow_Pipe1.xlsx",col_types =c("date", "numeric"))
water_2 = read_excel("Waterflow_Pipe2.xlsx",col_types =c("date", "numeric"))
colnames(water_1)= c("Date_time","WaterFlow")
colnames(water_2)= c("Date_Time","WaterFlow")
Water_df= water_1 %>% mutate(Date_Time = lubridate::round_date(Date_time,"hour") ) %>% select(Date_Time,WaterFlow) %>% bind_rows(water_2) %>% group_by(Date_Time) %>% summarize(WaterFlowF = mean(WaterFlow, na.rm = T))
## `summarise()` ungrouping output (override with `.groups` argument)
Water_ts = ts(Water_df$WaterFlowF,frequency = 24)
ggtsdisplay(Water_ts)
Water_Model1 = ets(Water_ts,lambda=BoxCox.lambda(Water_ts))
summary(Water_Model1)
## ETS(A,N,N)
##
## Call:
## ets(y = Water_ts, lambda = BoxCox.lambda(Water_ts))
##
## Box-Cox transformation: lambda= -0.8244
##
## Smoothing parameters:
## alpha = 0.0119
##
## Initial states:
## l = 1.1247
##
## sigma: 0.047
##
## AIC AICc BIC
## 798.1980 798.2221 812.9243
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.738182 16.76823 12.74726 -1.625442 42.41892 0.849401 0.05613023
checkresiduals(Water_Model1)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 41.298, df = 46, p-value = 0.6692
##
## Model df: 2. Total lags used: 48
autoplot(forecast(Water_Model1, 168, level = 95))
Water_Model1_Forecast = forecast(Water_Model1, 168, level=95)
Water_Model2 =auto.arima(Water_ts,lambda=BoxCox.lambda(Water_ts))
summary(Water_Model2)
## Series: Water_ts
## ARIMA(3,1,3)
## Box Cox transformation: lambda= -0.8243695
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## -0.4049 -0.9688 -0.0339 -0.5872 0.5673 -0.9475
## s.e. 0.0439 0.0255 0.0329 0.0309 0.0345 0.0236
##
## sigma^2 estimated as 0.002199: log likelihood=1642.1
## AIC=-3270.2 AICc=-3270.09 BIC=-3235.85
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.529514 16.72952 12.64518 -1.560004 41.71755 0.8425988 0.05232296
checkresiduals(Water_Model2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,1,3)
## Q* = 31.449, df = 42, p-value = 0.883
##
## Model df: 6. Total lags used: 48
Water_Model2_Forecast = forecast(Water_Model2, 168, level=95)
autoplot(forecast(Water_Model2, 168, level = 95))
Water_csv= data_frame(Date_Time = max(Water_df$Date_Time) + lubridate::hours(1:168),
WaterFlowF = as.numeric( Water_Model2_Forecast$mean) )
write.csv(Water_csv,"DATA624_WATERPIPE_FORECAST.csv")