## # A tibble: 5 x 2
## ATM n
## <fct> <int>
## 1 "" 14
## 2 ATM1 365
## 3 ATM2 365
## 4 ATM3 365
## 5 ATM4 365
## [1] "atm1 mean"
## [1] 83.88674
## [1] "atm2 mean"
## [1] 62.57851
## [1] "atm3 mean"
## [1] 87.66667
## [1] "atm4 mean"
## [1] 474.0137
## [1] "table for observeds"
## atm_1 atm_2 atm_4 total
## 1 49 69 83 201
## 2 22 59 50 131
## 3 131 87 65 283
## 4 85 52 20 157
## 5 15 14 23 52
## 6 2 11 15 28
## 7 54 66 108 228
## [1] "table for expecteds"
## atm_1 atm_2 atm_4 total
## 1 66.627778 66.627778 67.744444 201
## 2 43.424074 43.424074 44.151852 131
## 3 93.809259 93.809259 95.381481 283
## 4 52.042593 52.042593 52.914815 157
## 5 17.237037 17.237037 17.525926 52
## 6 9.281481 9.281481 9.437037 28
## 7 75.577778 75.577778 76.844444 228
## [1] "chi-square statistic"
## [1] 123.3003
## Series: atm_1_series.train
## ARIMA(0,0,1)(0,1,2)[7]
##
## Coefficients:
## ma1 sma1 sma2
## 0.1740 -0.5890 -0.1000
## s.e. 0.0574 0.0525 0.0519
##
## sigma^2 estimated as 587.2: log likelihood=-1552.97
## AIC=3113.95 AICc=3114.07 BIC=3129.23
## [1] 0
##
## Box-Ljung test
##
## data: atm_1_series.train
## X-squared = 216.24, df = 10, p-value < 2.2e-16
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.3357
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
## [1] "Seasonal Naive model- atm 1"
## [1] 13.47529
## [1] "Holt-Winters- atm 1"
## [1] 14.16765
## [1] "Ets model- atm 1"
## [1] 14.11364
## [1] "Arima model- atm 1"
## [1] 14.27036
## Series: atm_2_series.train
## ARIMA(2,0,2)(0,1,1)[7]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1
## -0.4404 -0.9236 0.4865 0.816 -0.7737
## s.e. 0.0527 0.0383 0.0855 0.055 0.0406
##
## sigma^2 estimated as 636.9: log likelihood=-1566.2
## AIC=3144.4 AICc=3144.65 BIC=3167.32
## [1] 1
##
## Box-Ljung test
##
## data: atm_2_series.train
## X-squared = 300.86, df = 10, p-value < 2.2e-16
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 2.307
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0143
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
## [1] "Seasonal Naive model- atm 2"
## [1] 13.02562
## [1] "Holt-Winters- atm 2"
## [1] 15.44299
## [1] "Ets model- atm 2"
## [1] 14.89989
## [1] "Arima model- atm 2"
## [1] 16.22091
## Series: atm_4_series.train
## ARIMA(0,0,0)(1,0,0)[7] with non-zero mean
##
## Coefficients:
## sar1 mean
## 0.1662 451.3334
## s.e. 0.0535 22.5110
##
## sigma^2 estimated as 122885: log likelihood=-2502.88
## AIC=5011.76 AICc=5011.83 BIC=5023.28
## [1] 0
##
## Box-Ljung test
##
## data: atm_4_series.train
## X-squared = 23.211, df = 10, p-value = 0.009995
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0762
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0193
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
## [1] "Seasonal Naive model- atm 4"
## [1] 290.4887
## [1] "Holt-Winters- atm 4"
## [1] 325.3421
## [1] "Ets model- atm 4"
## [1] 222.5605
## [1] "Arima model- atm 4"
## [1] 291.0883
atm_1_series<-atm_1_series[,3]
atm_2_series<-atm_2_series[,3]
atm_4_series<-atm_4_series[,3]
forecast_for_atm1<-snaive(atm_1_series,h=31)$mean
forecast_for_atm2<-snaive(atm_2_series,h=31)$mean
forecast_for_atm4<-hw(atm_4_series,seasonal='additive',h=31)$mean
write.csv(forecast_for_atm1,'C:/Users/dawig/Desktop/Data624/project1/atm1_predictions.csv',row.names=FALSE)
write.csv(forecast_for_atm2,'C:/Users/dawig/Desktop/Data624/project1/atm2_predictions.csv',row.names=FALSE)
write.csv(forecast_for_atm4,'C:/Users/dawig/Desktop/Data624/project1/atm4_predictions.csv',row.names=FALSE)
## Series: electricity_series.train
## ARIMA(3,0,2)(2,1,0)[12] with drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 sar1 sar2 drift
## -0.5073 -0.1852 0.3714 0.9055 0.4611 -0.7697 -0.4562 8002.927
## s.e. 0.2194 0.1775 0.0892 0.2294 0.2346 0.0770 0.0836 3084.620
##
## sigma^2 estimated as 3.36e+11: log likelihood=-2468.68
## AIC=4955.37 AICc=4956.5 BIC=4983.48
## [1] "ndiffs function result:"
## [1] 1
##
## Box-Ljung test
##
## data: electricity_series.train
## X-squared = 262.93, df = 10, p-value < 2.2e-16
##
## Box-Ljung test
##
## data: lj_b_test_1_diff
## X-squared = 2.2128, df = 10, p-value = 0.9944
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 1.1194
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 0.0208
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
## Series: electricity_series.train
## ARIMA(3,0,2)(2,1,0)[12] with drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 sar1 sar2 drift
## -0.5073 -0.1852 0.3714 0.9055 0.4611 -0.7697 -0.4562 8002.927
## s.e. 0.2194 0.1775 0.0892 0.2294 0.2346 0.0770 0.0836 3084.620
##
## sigma^2 estimated as 3.36e+11: log likelihood=-2468.68
## AIC=4955.37 AICc=4956.5 BIC=4983.48
## Series: electricity_series.train
## ARIMA(3,0,2)(2,1,0)[12] with drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 sar1 sar2 drift
## -0.5073 -0.1852 0.3714 0.9055 0.4611 -0.7697 -0.4562 8002.927
## s.e. 0.2194 0.1775 0.0892 0.2294 0.2346 0.0770 0.0836 3084.620
##
## sigma^2 estimated as 3.36e+11: log likelihood=-2468.68
## AIC=4955.37 AICc=4956.5 BIC=4983.48
##
## Call:
## arima(x = electricity_series.train, order = c(0, 0, 1), seasonal = list(order = c(2,
## 1, 0), period = 12))
##
## Coefficients:
## ma1 sar1 sar2
## 0.4509 -0.6812 -0.3784
## s.e. 0.0813 0.0760 0.0832
##
## sigma^2 estimated as 3.726e+11: log likelihood = -2480.11, aic = 4968.21
## [1] "RMSE for seasonal naive model"
## [1] 1035538
## [1] "RMSE for holt winters model"
## [1] 1033156
## [1] "RMSE for ets model"
## [1] 1005935
## [1] "RMSE for ARIMA model"
## [1] 947815.6
arima(electricity_series,order = c(3,0,2),seasonal = list(order = c(2,1,0),period=12)) %>% forecast(h=12)->electricity_set
write.csv(electricity_set$mean,'C:/Users/dawig/Desktop/Data624/project1/KWH_predictions.csv',row.names=FALSE)
## # A tibble: 6 x 2
## `mean(WaterFlow)` Date.Time
## <dbl> <dttm>
## 1 21.9 2015-10-23 00:00:00
## 2 24.1 2015-10-23 01:00:00
## 3 25.4 2015-10-23 02:00:00
## 4 27.7 2015-10-23 03:00:00
## 5 23.9 2015-10-23 04:00:00
## 6 22.4 2015-10-23 05:00:00
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 8.5142
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0072
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
## ME RMSE MAE MPE
## Training set -7.458742e+03 2.378944e+05 1.727398e+04 -5.159263e-04
## Test set 5.243015e-03 5.583996e-03 5.243015e-03 3.618101e-10
## MAPE MASE ACF1 Theil's U
## Training set 1.194900e-03 2.427032e+00 0.1595954 NA
## Test set 3.618101e-10 7.366550e-07 0.7428614 4.059932e-07
## [1] "series max"
## [1] 1449158400
## [1] "series min"
## [1] 1448982000
atm_series<-read.csv(‘C:/Users/dawig/Desktop/Data624/project1/ATM624Data.csv’) atm_series %>% group_by(ATM) %>% tally() atm_1_dataset<-atm_series[atm_series$ATM==‘ATM1’,] atm_2_dataset<-atm_series[atm_series$ATM==‘ATM2’,] atm_3_dataset<-atm_series[atm_series$ATM==‘ATM3’,] atm_4_dataset<-atm_series[atm_series$ATM==‘ATM4’,]
print(‘atm1 mean’) mean(atm_1_dataset\(Cash,na.rm=TRUE) print('atm2 mean') mean(atm_2_dataset\)Cash,na.rm=TRUE) print(‘atm3 mean’) mean(atm_3_dataset\(Cash[atm_3_dataset\)Cash>1]) print(‘atm4 mean’) mean(atm_4_dataset$Cash)
atm_1_series <- ts(atm_1_dataset,frequency=365) atm_2_series <- ts(atm_2_dataset,frequency=365) atm_3_series <- ts(atm_3_dataset,frequency=365) atm_4_series <- ts(atm_4_dataset[atm_4_dataset$Cash<10000,],frequency=365) autoplot(atm_1_series[, “Cash”])+scale_x_continuous(breaks=c(1,1.1667,1.3333,1.5,1.6667,1.8333,2),labels=c(‘May 1’,‘Jul 1’,‘Sep 1’,‘’,’Jan 1’,‘Mar 1’,‘May 1’))+xlab(‘2009-2010’)+ylab(‘Atm 1’)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),text = element_text(family = ‘corben’,color=‘#249382’,size=14),axis.text.x = element_text(angle=20,vjust=0.8,size=11),axis.text.y = element_text(size=11))
autoplot(atm_2_series[, “Cash”])+scale_x_continuous(breaks=c(1,1.1667,1.3333,1.5,1.6667,1.8333,2),labels=c(‘May 1’,‘Jul 1’,‘Sep 1’,‘’,’Jan 1’,‘Mar 1’,‘May 1’))+xlab(‘2009-2010’)+ylab(‘Atm 2’)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),text = element_text(family = ‘corben’,color=‘#249382’,size=14),axis.text.x = element_text(angle=20,vjust=0.8,size=11),axis.text.y = element_text(size=11))
autoplot(atm_3_series[, “Cash”])+scale_x_continuous(breaks=c(1,1.1667,1.3333,1.5,1.6667,1.8333,2),labels=c(‘May 1’,‘Jul 1’,‘Sep 1’,‘’,’Jan 1’,‘Mar 1’,‘May 1’))+xlab(‘2009-2010’)+ylab(‘Atm 3’)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),text = element_text(family = ‘corben’,color=‘#249382’,size=14),axis.text.x = element_text(angle=20,vjust=0.8,size=11),axis.text.y = element_text(size=11))
autoplot(atm_4_series[, “Cash”])+scale_x_continuous(breaks=c(1,1.1667,1.3333,1.5,1.6667,1.8333,2),labels=c(‘May 1’,‘Jul 1’,‘Sep 1’,‘’,’Jan 1’,‘Mar 1’,‘May 1’))+xlab(‘2009-2010’)+ylab(‘Atm 4’)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),text = element_text(family = ‘corben’,color=‘#249382’,size=14),axis.text.x = element_text(angle=20,vjust=0.8,size=11),axis.text.y = element_text(size=11))
#Create category set for observeds and expecteds atm_1_dataset %>% mutate(change_pct= Cash/lag(Cash))%>% mutate(change_category=cut(change_pct,breaks=c(0,.3,.6,1,1.4,1.8,2.2,Inf),labels=c(‘a’,‘b’,‘c’,‘d’,‘e’,‘f’,‘g’)))->atm_1_dataset_b atm_1_dataset_b %>% group_by(change_category) %>% tally()->cats_for_1 #—————– atm_2_dataset %>% mutate(change_pct= Cash/lag(Cash))%>% mutate(change_category=cut(change_pct,breaks=c(0,.3,.6,1,1.4,1.8,2.2,Inf),labels=c(‘a’,‘b’,‘c’,‘d’,‘e’,‘f’,‘g’)))->atm_2_dataset_b atm_2_dataset_b %>% group_by(change_category) %>% tally()->cats_for_2 #—————– atm_4_dataset %>% mutate(change_pct= Cash/lag(Cash))%>% mutate(change_category=cut(change_pct,breaks=c(0,.3,.6,1,1.4,1.8,2.2,Inf),labels=c(‘a’,‘b’,‘c’,‘d’,‘e’,‘f’,‘g’)))->atm_4_dataset_b atm_4_dataset_b %>% group_by(change_category) %>% tally()->cats_for_4 categories_for_chi_sq<-cbind(cats_for_1[1:7,2],cats_for_2[1:7,2],cats_for_4[1:7,2]) colnames(categories_for_chi_sq)<-c(‘atm_1’,‘atm_2’,‘atm_4’) categories_for_chi_sq %>% mutate(total= rowSums(.[1:3])) -> categories_for_chi_sq categories_for_chi_sq %>% summarise_all(funs(sum))->chi_sq_col_totals categories_for_chi_sq_expecteds<-categories_for_chi_sq #—————–build table for expecteds and then calculate statistic percents_of_total_each_category<-rep(0,7) for(i in 1:7){ percents_of_total_each_category[i]<-categories_for_chi_sq\(total[i]/sum(categories_for_chi_sq\)total) } chi_sq_stat<-0 for(i in 1:7){ for(j in 1:3){ categories_for_chi_sq_expecteds[i,j]<-percents_of_total_each_category[i]*chi_sq_col_totals[j] chi_sq_stat<-chi_sq_stat+((categories_for_chi_sq[i,j]-categories_for_chi_sq_expecteds[i,j])^2/categories_for_chi_sq_expecteds[i,j]) } } print(‘table for observeds’) categories_for_chi_sq print(‘table for expecteds’) categories_for_chi_sq_expecteds print(‘chi-square statistic’) chi_sq_stat
#Split off training sets atm_1_series <- ts(atm_1_dataset,frequency=7) atm_2_series <- ts(atm_2_dataset,frequency=7) atm_4_series <- ts(atm_4_dataset[atm_4_dataset$Cash<10000,],frequency=7) atm_1_series[is.na(atm_1_series)]<-mean(atm_1_series,na.rm=TRUE) atm_2_series[is.na(atm_2_series)]<-mean(atm_2_series,na.rm=TRUE) atm_1_series.train <- window(atm_1_series[,3], end=50.1) atm_1_series.test <- window(atm_1_series[,3], start=50.1) atm_2_series.train <- window(atm_2_series[,3], end=50.1) atm_2_series.test <- window(atm_2_series[,3], start=50.1) atm_4_series.train <- window(atm_4_series[,3], end=50.1) atm_4_series.test <- window(atm_4_series[,3], start=50.1) plot.a<-gglagplot(atm_1_series.train,lags=9)+ggtitle(‘Lag plots for ATM 1’)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’‘) plot.b<-gglagplot(atm_2_series.train,lags=9)+ggtitle(’Lag plots for ATM 2’)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’‘) plot.c<-gglagplot(atm_4_series.train,lags=9)+ggtitle(’Lag plots for ATM 4’)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) grid.arrange(plot.a, plot.b,plot.c, nrow = 1)
atm_1_series_decomposed<-decompose(atm_1_series.train) autoplot(atm_1_series_decomposed)
atm_1_series_seasonal_naive<-snaive(atm_1_series.train) autoplot(atm_1_series_seasonal_naive)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(atm_1_series_seasonal_naive))
holt_winter_atm_1<-hw(atm_1_series.train,seasonal=‘additive’) autoplot(holt_winter_atm_1)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(holt_winter_atm_1))
#ets series 1 ets_atm_1<-ets(atm_1_series.train) autoplot(ets_atm_1)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
autoplot(predict(ets_atm_1))+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
tsdisplay(residuals(ets_atm_1))
auto.arima(atm_1_series.train) ndiffs(atm_1_series.train) acf(atm_1_series.train) pacf(atm_1_series.train) Box.test(atm_1_series.train, lag=10, type=“Ljung-Box”) ur.kpss(atm_1_series.train) %>% summary() roots_vis<-arima(atm_1_series.train,order = c(0,0,0),seasonal = list(order = c(0,1,1),period=7),incl=FALSE,optim.control = list(maxit = 50)) plot(roots_vis)
print(‘Seasonal Naive model- atm 1’) accuracy(forecast(atm_1_series_seasonal_naive,h=12),atm_1_series.test)[4] print(‘Holt-Winters- atm 1’) accuracy(forecast(holt_winter_atm_1,h=12),atm_1_series.test)[4] print(‘Ets model- atm 1’) accuracy(forecast(ets_atm_1,h=12),atm_1_series.test)[4] print(‘Arima model- atm 1’) accuracy(forecast(roots_vis,h=12),atm_1_series.test)[4]
atm_2_series_decomposed<-decompose(atm_2_series.train) autoplot(atm_2_series_decomposed)
atm_2_series_seasonal_naive<-snaive(atm_2_series.train) autoplot(atm_2_series_seasonal_naive)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(atm_2_series_seasonal_naive))
holt_winter_atm_2<-hw(atm_2_series.train,seasonal=‘additive’) autoplot(holt_winter_atm_2)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(holt_winter_atm_2))
#ets series 2 ets_atm_2<-ets(atm_2_series.train) autoplot(ets_atm_2)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
autoplot(predict(ets_atm_2))+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
tsdisplay(residuals(ets_atm_2))
auto.arima(atm_2_series.train) ndiffs(atm_2_series.train) acf(atm_2_series.train) pacf(atm_2_series.train) Box.test(atm_2_series.train, lag=10, type=“Ljung-Box”) ur.kpss(atm_2_series.train) %>% summary() ur.kpss(diff(atm_2_series.train)) %>% summary()
roots_vis<-arima(atm_2_series.train,order = c(2,0,2),seasonal = list(order = c(0,0,1),period=7),incl=FALSE,optim.control = list(maxit = 50)) plot(roots_vis) roots_vis<-arima(atm_2_series.train,order = c(2,0,2),seasonal = list(order = c(0,1,1),period=7),incl=FALSE,optim.control = list(maxit = 50)) plot(roots_vis)
print(‘Seasonal Naive model- atm 2’) accuracy(forecast(atm_2_series_seasonal_naive,h=12),atm_2_series.test)[4] print(‘Holt-Winters- atm 2’) accuracy(forecast(holt_winter_atm_2,h=12),atm_2_series.test)[4] print(‘Ets model- atm 2’) accuracy(forecast(ets_atm_2,h=12),atm_2_series.test)[4] print(‘Arima model- atm 2’) accuracy(forecast(roots_vis,h=12),atm_2_series.test)[4]
atm_4_series_decomposed<-decompose(atm_4_series.train) autoplot(atm_4_series_decomposed)
atm_4_series_seasonal_naive<-snaive(atm_4_series.train) autoplot(atm_4_series_seasonal_naive)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(atm_4_series_seasonal_naive))
holt_winter_atm_4<-hw(atm_4_series.train,seasonal=‘additive’) autoplot(holt_winter_atm_4)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(holt_winter_atm_4))
#ets series 4 ets_atm_4<-ets(atm_4_series.train) autoplot(ets_atm_4)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
autoplot(predict(ets_atm_4))+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
tsdisplay(residuals(ets_atm_4))
auto.arima(atm_4_series.train) ndiffs(atm_4_series.train) acf(atm_4_series.train) pacf(atm_4_series.train) Box.test(atm_4_series.train, lag=10, type=“Ljung-Box”) ur.kpss(atm_4_series.train) %>% summary() ur.kpss(diff(atm_4_series.train)) %>% summary() roots_vis<-arima(atm_4_series.train,order = c(0,0,0),seasonal = list(order = c(1,0,0),period=7),incl=FALSE,optim.control = list(maxit = 50)) plot(roots_vis)
print(‘Seasonal Naive model- atm 4’) accuracy(forecast(atm_4_series_seasonal_naive,h=12),atm_4_series.test)[4] print(‘Holt-Winters- atm 4’) accuracy(forecast(holt_winter_atm_4,h=12),atm_4_series.test)[4] print(‘Ets model- atm 4’) accuracy(forecast(ets_atm_4,h=12),atm_4_series.test)[4] print(‘Arima model- atm 4’) accuracy(forecast(roots_vis,h=12),atm_4_series.test)[4]
atm_1_series<-atm_1_series[,3] atm_2_series<-atm_2_series[,3] atm_4_series<-atm_4_series[,3] forecast_for_atm1<-snaive(atm_1_series,h=31)\(mean forecast_for_atm2<-snaive(atm_2_series,h=31)\)mean forecast_for_atm4<-hw(atm_4_series,seasonal=‘additive’,h=31)$mean write.csv(forecast_for_atm1,‘C:/Users/dawig/Desktop/Data624/project1/atm1_predictions.csv’,row.names=FALSE) write.csv(forecast_for_atm2,‘C:/Users/dawig/Desktop/Data624/project1/atm2_predictions.csv’,row.names=FALSE) write.csv(forecast_for_atm4,‘C:/Users/dawig/Desktop/Data624/project1/atm4_predictions.csv’,row.names=FALSE)
electricity_series_in<-read.csv(‘C:/Users/dawig/Desktop/Data624/project1/ResidentialCustomerForecastLoad-624.csv’) plot(electricity_series_in\(KWH) lines(electricity_series_in\)KWH) impute_value<-(electricity_series_in\(KWH) electricity_series_in\)KWH[is.na(electricity_series_in$KWH)]<-impute_value electricity_series_in\(KWH[electricity_series_in\)KWH<3000000]<-impute_value electricity_series<-ts(electricity_series_in$KWH, frequency=12) autoplot(window(electricity_series,end=7.99))+ylim(2000000,20000000) autoplot(window(electricity_series,start=8))+ylim(0,20000000)
gglagplot(electricity_series)+ggtitle(’’)
electricity_series.train <- window(electricity_series, end=15.99) electricity_series.test <- window(electricity_series, start=16) electricity_series_decomposed<-decompose(electricity_series.train) autoplot(electricity_series_decomposed) electricity_series_seasonal_naive<-snaive(electricity_series.train) autoplot(electricity_series_seasonal_naive)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(electricity_series_seasonal_naive))
holt_winter_electric<-hw(electricity_series.train,seasonal=‘additive’) autoplot(holt_winter_electric)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’) tsdisplay(residuals(holt_winter_electric))
ets_electric<-ets(electricity_series.train) autoplot(ets_electric)+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
autoplot(predict(ets_electric))+theme(panel.background = element_rect(fill = ‘#f4f4ef’),text =element_text(family=‘corben’,color=‘#249382’,size=12))+xlab(‘’)+ylab(’’)
tsdisplay(residuals(ets_electric))
auto.arima(electricity_series.train) acf(electricity_series.train) pacf(electricity_series.train) print(‘ndiffs function result:’) ndiffs(electricity_series.train) Box.test(electricity_series.train, lag=10, type=“Ljung-Box”) lj_b_test_1_diff<-residuals(arima(electricity_series.train,order = c(3,0,2),seasonal = list(order = c(2,1,0),period=12))) Box.test(lj_b_test_1_diff, lag=10, type=“Ljung-Box”) ur.kpss(electricity_series.train) %>% summary() ur.kpss(diff(electricity_series.train)) %>% summary() roots_vis<-arima(electricity_series.train,order = c(3,0,2),seasonal = list(order = c(2,1,0),period=12),incl=FALSE,optim.control = list(maxit = 50)) plot(roots_vis)
acf(diff(electricity_series.train)) pacf(diff(electricity_series.train)) electricity_arima_aicc<-auto.arima(electricity_series.train,ic = “aicc”) electricity_arima_aic<-auto.arima(electricity_series.train,ic = “aic”) electricity_arima_bic<-auto.arima(electricity_series.train,ic = “bic”) electricity_arima_bic<-arima(electricity_series.train,order = c(0,0,1),seasonal = list(order = c(2,1,0),period=12)) electricity_arima_aicc tsdisplay(residuals(electricity_arima_aicc)) electricity_arima_aic tsdisplay(residuals(electricity_arima_aic)) electricity_arima_bic tsdisplay(residuals(electricity_arima_bic))
print(‘RMSE for seasonal naive model’) accuracy(forecast(electricity_series_seasonal_naive,h=12),electricity_series.test)[4]
print(‘RMSE for holt winters model’) accuracy(forecast(holt_winter_electric,h=12),electricity_series.test)[4]
print(‘RMSE for ets model’) accuracy(forecast(ets_electric,h=12),electricity_series.test)[4]
print(‘RMSE for ARIMA model’) accuracy(forecast(electricity_arima_aic,h=12),electricity_series.test)[4]
sn_forecast<-matrix(unlist(forecast(electricity_series_seasonal_naive,h=4)[8])) sn_forecast_low80<-matrix(forecast(electricity_series_seasonal_naive,h=4)[10][1][1]\(lower[,1]) sn_forecast_high80<-matrix(forecast(electricity_series_seasonal_naive,h=4)[11][1][1]\)upper[,1])
ets_forecast<-matrix(unlist(forecast(ets_electric,h=4)[2])) ets_forecast_high80<-matrix(unlist(forecast(ets_electric,h=4)[5]))[1:4] ets_forecast_low80<-matrix(unlist(forecast(ets_electric,h=4)[6]))[1:4]
arima_forecast<-matrix(unlist(forecast(electricity_arima_aic,h=4)[4])) arima_forecast_low80<-matrix(unlist(forecast(electricity_arima_aic,h=4)[5]))[1:4] arima_forecast_high80<-matrix(unlist(forecast(electricity_arima_aic,h=4)[6]))[1:4]
actual_experience<-matrix(electricity_series.test[1:4])
estimate_comparison<-as.data.frame(cbind(actual_experience,sn_forecast,arima_forecast,sn_forecast_low80,arima_forecast_low80,sn_forecast_high80,arima_forecast_high80)) colnames(estimate_comparison)<-c(‘actual_experience’,‘sn_forecast’,‘arima_forecast’,‘sn_forecast_low80’,‘arima_forecast_low80’,‘sn_forecast_high80’,‘arima_forecast_high80’)
ggplot()+geom_point(aes(y=estimate_comparison\(actual_experience,x=seq_along(estimate_comparison\)actual_experience)),size=2.5)+ geom_point(aes(y=estimate_comparison\(sn_forecast,x=seq_along(estimate_comparison\)sn_forecast)),color=‘#c93669’,size=2.5,shape=17)+ geom_point(aes(y=estimate_comparison\(arima_forecast,x=seq_along(estimate_comparison\)arima_forecast)),color=‘#289b8c’,size=2.5,shape=12)+ geom_point(aes(y=estimate_comparison\(sn_forecast_low80,x=seq_along(estimate_comparison\)sn_forecast_low80)),color=‘#f296b6’,size=2,shape=17)+ geom_point(aes(y=estimate_comparison\(arima_forecast_low80,x=seq_along(estimate_comparison\)arima_forecast_low80)),color=‘#a3f7ec’,size=2,shape=12)+ geom_point(aes(y=estimate_comparison\(sn_forecast_high80,x=seq_along(estimate_comparison\)sn_forecast_high80)),color=‘#f296b6’,size=2,shape=17)+ geom_point(aes(y=estimate_comparison\(arima_forecast_high80,x=seq_along(estimate_comparison\)arima_forecast_high80)),color=‘#a3f7ec’,size=2,shape=12)+ xlab(‘square:best model ….. diamond:worst model’)+ylab(‘first 4 estimates’)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),text =element_text(family=‘corben’,color=‘#249382’,size=12))
arima(electricity_series,order = c(3,0,2),seasonal = list(order = c(2,1,0),period=12)) %>% forecast(h=12)->electricity_set
write.csv(electricity_set$mean,‘C:/Users/dawig/Desktop/Data624/project1/KWH_predictions.csv’,row.names=FALSE)
##Water Pipes
pipe_1<-read.csv(‘C:/Users/dawig/Desktop/Data624/project1/Waterflow_Pipe1.csv’) pipe_2<-read.csv(‘C:/Users/dawig/Desktop/Data624/project1/Waterflow_Pipe2.csv’) pipes_matrix<-as.matrix(rbind(pipe_1,pipe_2)) pipes_matrix[,1]<-as.POSIXct(pipes_matrix[,1],format=“%m/%d/%y %H:%M %p”)
pipes_matrix<-pipes_matrix[(order(pipes_matrix[,1])),] Date.Time<-as.numeric(pipes_matrix[,1]) WaterFlow<-pipes_matrix[,2] pipes_matrix<-as.data.frame(cbind(Date.Time,WaterFlow),strings.as.factors=FALSE ) pipes_matrix[,1]<-as.numeric(as.character(pipes_matrix[,1])) pipes_matrix[,2]<-as.numeric(as.character(pipes_matrix[,2])) pipes_matrix %>% mutate(Date.Time.minus=Date.Time-1445576400) %>% mutate(Date.category=floor(Date.Time.minus/3600)) ->pipes_matrix
pipes_matrix %>% group_by(Date.category) %>% summarise(mean(WaterFlow))->pipes_matrix pipes_matrix %>% mutate(Date.Time.helper=Date.category*3600+1445572800) %>% mutate(Date.Time= as.POSIXct(as.numeric(Date.Time.helper), origin = “1970-01-01”))->pipes_matrix pipes_time_series<-ts(pipes_matrix[‘Date.Time’]) autoplot(pipes_time_series)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),text = element_text(family = ‘corben’,color=‘#249382’,size=14),axis.text.x = element_text(angle=20,vjust=0.8,size=11),axis.text.y = element_text(size=11)) head(pipes_matrix[,c(2,4)]) pipes_auto_ar<-auto.arima(pipes_time_series)
autoplot(residuals(pipes_auto_ar))+ylim(-40000,40000)+ylab(‘’)+xlab(’residuals’)+ theme(panel.background = element_rect(fill = ‘#f4f4ef’),text = element_text(family = ‘corben’,color=‘#249382’,size=14),axis.text.x = element_text(angle=20,vjust=0.8,size=11),axis.text.y = element_text(size=11)) ur.kpss(pipes_time_series) %>% summary() acf(diff(pipes_time_series)) pacf(diff(pipes_time_series))
roots_vis<-arima(pipes_time_series,order = c(5,1,2),seasonal = list(order = c(0,1,0),period=12),include.mean=TRUE,optim.control = list(maxit = 50)) plot(roots_vis)
ur.kpss(diff(pipes_time_series)) %>% summary() autoplot(residuals(roots_vis))+ylim(-60,60)+ylab(‘’)+xlab(’Residuals zoomed in’) autoplot(residuals(roots_vis))+ylim(-60000,600)+ylab(‘’)+xlab(’Residuals medium zoom’) autoplot(residuals(roots_vis))+ylab(‘’)+xlab(’Residuals’) fit.series<-window(pipes_time_series,end=480) test.series<-window(pipes_time_series,start=480) pipes_arima_to_test<-arima(fit.series,order = c(5,1,2),seasonal = list(order = c(0,1,0),period=12),include.mean=TRUE,optim.control = list(maxit = 50)) accuracy(forecast(pipes_arima_to_test,h=25),test.series) print(‘series max’) max(test.series) print(‘series min’) min(test.series) pipes_final_forecast<-forecast(arima(pipes_time_series,order = c(5,1,2),seasonal = list(order = c(0,1,0),period=12),include.mean=TRUE,optim.control = list(maxit = 50)),h=168)
pipes_final_forecast<-pipes_final_forecast[4] names(pipes_final_forecast)<-‘pipe_flow_forecast’ #write.csv(pipes_final_forecast,‘C:/Users/dawig/Desktop/Data624/project1/pipes_predictions.csv’,row.names=FALSE)