library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data = read_excel("C:/Users/naura/Downloads/Data Gabungan.xlsx")
data
## # A tibble: 132 × 2
## Bulan `Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga`
## <chr> <dbl>
## 1 Januari 632780
## 2 Februari 629070
## 3 Maret 570042
## 4 April 629607
## 5 Mei 621073
## 6 Juni 638495
## 7 Juli 623579
## 8 Agustus 607441
## 9 September 731410
## 10 Oktober 630958
## # ℹ 122 more rows
#Mengecek dimensi data
dim(data)
## [1] 132 2
#Mengecek variabel data
names(data)
## [1] "Bulan"
## [2] "Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga"
#Mengecek tipe data tiap variabel
str(data)
## tibble [132 × 2] (S3: tbl_df/tbl/data.frame)
## $ Bulan : chr [1:132] "Januari" "Februari" "Maret" "April" ...
## $ Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga: num [1:132] 632780 629070 570042 629607 621073 ...
#Mengecek missing value
is.na(data)
## Bulan Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga
## [1,] FALSE FALSE
## [2,] FALSE FALSE
## [3,] FALSE FALSE
## [4,] FALSE FALSE
## [5,] FALSE FALSE
## [6,] FALSE FALSE
## [7,] FALSE FALSE
## [8,] FALSE FALSE
## [9,] FALSE FALSE
## [10,] FALSE FALSE
## [11,] FALSE FALSE
## [12,] FALSE FALSE
## [13,] FALSE FALSE
## [14,] FALSE FALSE
## [15,] FALSE FALSE
## [16,] FALSE FALSE
## [17,] FALSE FALSE
## [18,] FALSE FALSE
## [19,] FALSE FALSE
## [20,] FALSE FALSE
## [21,] FALSE FALSE
## [22,] FALSE FALSE
## [23,] FALSE FALSE
## [24,] FALSE FALSE
## [25,] FALSE FALSE
## [26,] FALSE FALSE
## [27,] FALSE FALSE
## [28,] FALSE FALSE
## [29,] FALSE FALSE
## [30,] FALSE FALSE
## [31,] FALSE FALSE
## [32,] FALSE FALSE
## [33,] FALSE FALSE
## [34,] FALSE FALSE
## [35,] FALSE FALSE
## [36,] FALSE FALSE
## [37,] FALSE FALSE
## [38,] FALSE FALSE
## [39,] FALSE FALSE
## [40,] FALSE FALSE
## [41,] FALSE FALSE
## [42,] FALSE FALSE
## [43,] FALSE FALSE
## [44,] FALSE FALSE
## [45,] FALSE FALSE
## [46,] FALSE FALSE
## [47,] FALSE FALSE
## [48,] FALSE FALSE
## [49,] FALSE FALSE
## [50,] FALSE FALSE
## [51,] FALSE FALSE
## [52,] FALSE FALSE
## [53,] FALSE FALSE
## [54,] FALSE FALSE
## [55,] FALSE FALSE
## [56,] FALSE FALSE
## [57,] FALSE FALSE
## [58,] FALSE FALSE
## [59,] FALSE FALSE
## [60,] FALSE FALSE
## [61,] FALSE FALSE
## [62,] FALSE FALSE
## [63,] FALSE FALSE
## [64,] FALSE FALSE
## [65,] FALSE FALSE
## [66,] FALSE FALSE
## [67,] FALSE FALSE
## [68,] FALSE FALSE
## [69,] FALSE FALSE
## [70,] FALSE FALSE
## [71,] FALSE FALSE
## [72,] FALSE FALSE
## [73,] FALSE FALSE
## [74,] FALSE FALSE
## [75,] FALSE FALSE
## [76,] FALSE FALSE
## [77,] FALSE FALSE
## [78,] FALSE FALSE
## [79,] FALSE FALSE
## [80,] FALSE FALSE
## [81,] FALSE FALSE
## [82,] FALSE FALSE
## [83,] FALSE FALSE
## [84,] FALSE FALSE
## [85,] FALSE FALSE
## [86,] FALSE FALSE
## [87,] FALSE FALSE
## [88,] FALSE FALSE
## [89,] FALSE FALSE
## [90,] FALSE FALSE
## [91,] FALSE FALSE
## [92,] FALSE FALSE
## [93,] FALSE FALSE
## [94,] FALSE FALSE
## [95,] FALSE FALSE
## [96,] FALSE FALSE
## [97,] FALSE FALSE
## [98,] FALSE FALSE
## [99,] FALSE FALSE
## [100,] FALSE FALSE
## [101,] FALSE FALSE
## [102,] FALSE FALSE
## [103,] FALSE FALSE
## [104,] FALSE FALSE
## [105,] FALSE FALSE
## [106,] FALSE FALSE
## [107,] FALSE FALSE
## [108,] FALSE FALSE
## [109,] FALSE FALSE
## [110,] FALSE FALSE
## [111,] FALSE FALSE
## [112,] FALSE FALSE
## [113,] FALSE FALSE
## [114,] FALSE FALSE
## [115,] FALSE FALSE
## [116,] FALSE FALSE
## [117,] FALSE FALSE
## [118,] FALSE FALSE
## [119,] FALSE FALSE
## [120,] FALSE FALSE
## [121,] FALSE FALSE
## [122,] FALSE FALSE
## [123,] FALSE FALSE
## [124,] FALSE FALSE
## [125,] FALSE FALSE
## [126,] FALSE FALSE
## [127,] FALSE FALSE
## [128,] FALSE FALSE
## [129,] FALSE FALSE
## [130,] FALSE FALSE
## [131,] FALSE FALSE
## [132,] FALSE FALSE
#Statistika deskriptif
library(pastecs)
stat.desc(data)
## Bulan Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga
## nbr.val NA 1.320000e+02
## nbr.null NA 0.000000e+00
## nbr.na NA 0.000000e+00
## min NA 5.700420e+05
## max NA 1.252274e+06
## range NA 6.822320e+05
## sum NA 1.174807e+08
## median NA 8.993385e+05
## mean NA 8.900051e+05
## SE.mean NA 1.492563e+04
## CI.mean NA 2.952646e+04
## var NA 2.940623e+10
## std.dev NA 1.714824e+05
## coef.var NA 1.926758e-01
summary(data)
## Bulan
## Length:132
## Class :character
## Mode :character
##
##
##
## Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga
## Min. : 570042
## 1st Qu.: 734183
## Median : 899339
## Mean : 890005
## 3rd Qu.:1041407
## Max. :1252274
var(data)
## Warning in var(data): NAs introduced by coercion
## Bulan
## Bulan NA
## Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga NA
## Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga
## Bulan NA
## Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga 29406227165
#Preprocessing data
#Untuk pemodelan hanya digunakan variabel jumlah pengunjung maka variabel bulan dihapus
df = subset(data, select = -c(Bulan))
df
## # A tibble: 132 × 1
## `Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga`
## <dbl>
## 1 632780
## 2 629070
## 3 570042
## 4 629607
## 5 621073
## 6 638495
## 7 623579
## 8 607441
## 9 731410
## 10 630958
## # ℹ 122 more rows
#Mengubah dataset menjadi data time serues
datats <- ts(df)
datats
## Time Series:
## Start = 1
## End = 132
## Frequency = 1
## Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga
## [1,] 632780
## [2,] 629070
## [3,] 570042
## [4,] 629607
## [5,] 621073
## [6,] 638495
## [7,] 623579
## [8,] 607441
## [9,] 731410
## [10,] 630958
## [11,] 649444
## [12,] 616313
## [13,] 640357
## [14,] 620395
## [15,] 605823
## [16,] 613340
## [17,] 614734
## [18,] 647256
## [19,] 623887
## [20,] 616826
## [21,] 723887
## [22,] 676394
## [23,] 694596
## [24,] 633801
## [25,] 731630
## [26,] 695709
## [27,] 639583
## [28,] 722580
## [29,] 704103
## [30,] 726268
## [31,] 703827
## [32,] 778116
## [33,] 712379
## [34,] 763305
## [35,] 800936
## [36,] 735034
## [37,] 816973
## [38,] 764562
## [39,] 714859
## [40,] 786923
## [41,] 773100
## [42,] 771532
## [43,] 755305
## [44,] 881519
## [45,] 818402
## [46,] 852078
## [47,] 855009
## [48,] 817487
## [49,] 862107
## [50,] 834915
## [51,] 799143
## [52,] 843878
## [53,] 854679
## [54,] 837788
## [55,] 908868
## [56,] 808683
## [57,] 878835
## [58,] 836492
## [59,] 877216
## [60,] 882944
## [61,] 879742
## [62,] 842617
## [63,] 814409
## [64,] 904968
## [65,] 874650
## [66,] 909340
## [67,] 923819
## [68,] 866804
## [69,] 966996
## [70,] 918675
## [71,] 959668
## [72,] 946335
## [73,] 952935
## [74,] 947639
## [75,] 879686
## [76,] 893709
## [77,] 942714
## [78,] 1009566
## [79,] 940232
## [80,] 983611
## [81,] 988971
## [82,] 946565
## [83,] 1024490
## [84,] 1004905
## [85,] 995408
## [86,] 1019358
## [87,] 957477
## [88,] 936902
## [89,] 1080399
## [90,] 1076141
## [91,] 982392
## [92,] 1022212
## [93,] 990252
## [94,] 1052996
## [95,] 1101763
## [96,] 1033717
## [97,] 1083481
## [98,] 1037793
## [99,] 1039279
## [100,] 1094504
## [101,] 1007258
## [102,] 1067597
## [103,] 1065958
## [104,] 1166006
## [105,] 1066549
## [106,] 1076189
## [107,] 1080513
## [108,] 1107277
## [109,] 1083481
## [110,] 1037793
## [111,] 1039279
## [112,] 1094504
## [113,] 1007258
## [114,] 1067597
## [115,] 1065958
## [116,] 1166006
## [117,] 1066549
## [118,] 1076189
## [119,] 1080513
## [120,] 1107277
## [121,] 1135008
## [122,] 1150182
## [123,] 1086476
## [124,] 1071669
## [125,] 1252274
## [126,] 1026599
## [127,] 1080195
## [128,] 1047791
## [129,] 1097630
## [130,] 1164977
## [131,] 1092270
## [132,] 1080454
str(datats)
## Time-Series [1:132, 1] from 1 to 132: 632780 629070 570042 629607 621073 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "Air Minum yang Disalurkan ke Pelanggan oleh PDAM Purbalingga"
plot.ts(datats)
#Menampilkan plot korelasi ACF
acf(datats)
#Menampilkan hasil korelasi ACF
print(acf(datats))
##
## Autocorrelations of series 'datats', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 1.000 0.928 0.919 0.896 0.878 0.870 0.841 0.829 0.800 0.800 0.770 0.751 0.739
## 13 14 15 16 17 18 19 20 21
## 0.706 0.692 0.658 0.638 0.614 0.585 0.568 0.542 0.530
#Menampilkan plot korelasi PACF
pacf(datats)
#Menampilkan hasil korelasi PACF
print(pacf(datats))
##
## Partial autocorrelations of series 'datats', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11
## 0.928 0.417 0.100 0.039 0.112 -0.088 0.010 -0.088 0.146 -0.106 -0.043
## 12 13 14 15 16 17 18 19 20 21
## 0.048 -0.101 -0.042 -0.073 -0.020 0.006 -0.084 0.047 0.019 0.022
#Melakukan differencing karena plot time series sebelumnya terlihat bahwa data jumlah pengunjung di lokawisata baturaden belum stasioner dalam rata-rata maupun variansi atau plot sebelumnya tidak berfluktuasi disekitar titik nol/konstan
datadiff1 <- diff(datats, differences = 1)
#Menampilkan plot time series setelah di differencing
plot.ts(datadiff1)
#Menampilkan plot korelasi ACF (4 lag)
acf(datadiff1)
#Menampilkan hasil korelasi ACF
print(acf(datadiff1))
##
## Autocorrelations of series 'datadiff1', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 -0.549 0.054 0.010 -0.072 0.170 -0.131 0.040 -0.109 0.195 -0.125
## 11 12 13 14 15 16 17 18 19 20 21
## -0.064 0.190 -0.156 0.151 -0.116 -0.005 0.097 -0.119 0.096 -0.095 0.098
#Menampilkan plot korelasi PACF (6 lag)
pacf(datadiff1)
#Menampilkan hasil korelasi PACF
print(pacf(datadiff1))
##
## Partial autocorrelations of series 'datadiff1', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11
## -0.549 -0.354 -0.235 -0.284 -0.033 -0.044 -0.022 -0.209 0.023 -0.038 -0.197
## 12 13 14 15 16 17 18 19 20 21
## 0.025 0.006 0.110 0.082 0.035 0.107 -0.036 0.013 -0.005 0.029
#Model ARIMA yang mungkin
fit1 = arima(datats, order = c(1, 1, 0))
fit2 = arima(datats, order = c(2, 1, 0))
fit3 = arima(datats, order = c(2, 1, 2))
fit4 = arima(datats, order = c(3, 1, 2))
fit5 = arima(datats, order = c(3, 1, 7))
fit6 = arima(datats, order = c(4, 1, 2))
fit7 = arima(datats, order = c(4, 1, 3))
fit8 = arima(datats, order = c(4, 1, 4))
fit9 = arima(datats, order = c(4, 1, 6))
## Warning in arima(datats, order = c(4, 1, 6)): possible convergence problem:
## optim gave code = 1
fit12 = arima(datats, order = c(7, 1, 3))
accuracy(fit1)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5280.469 49161.21 37787.74 0.4277071 4.211512 0.8307398 -0.2017305
accuracy(fit2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7364.218 46156.14 36092.85 0.6740884 4.02082 0.7934788 -0.1024383
accuracy(fit3)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 11144.31 43888.96 33931.69 1.120711 3.778805 0.7459669 -0.08492358
accuracy(fit4)
## ME RMSE MAE MPE MAPE MASE
## Training set 9957.985 44357.17 33719.01 0.9803186 3.784465 0.7412914
## ACF1
## Training set -0.06540423
accuracy(fit5)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9851.656 42165.21 31524.56 0.9846429 3.5061 0.6930478 -0.06454883
accuracy(fit6)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9202.371 43673.31 33100.9 0.8968647 3.689713 0.7277026 -0.07355099
accuracy(fit7)
## ME RMSE MAE MPE MAPE MASE
## Training set 9879.004 43784.45 33513.26 0.9770614 3.743147 0.7367681
## ACF1
## Training set -0.06356928
accuracy(fit8)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 10681.51 43407.28 32916.63 1.071644 3.657218 0.7236515 -0.07698961
accuracy(fit9)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2703.43 40493.61 30834.76 0.2517745 3.46232 0.6778829 -0.02390988
accuracy(fit12)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2105.771 40699.82 30768.24 0.1709919 3.45793 0.6764205 0.01450907
#Pemodelan terbaik, yaitu pada fit12 karena nilai RMSE sebesar dan karena dari plot histogram dan p-value > 0.05
tsdiag(fit12)
library(forecast)
fit12
##
## Call:
## arima(x = datats, order = c(7, 1, 3))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ma1 ma2
## -1.2650 -0.5906 0.5288 0.6202 0.7025 0.6184 0.3722 0.4171 -0.4216
## s.e. 0.0897 0.1334 0.1311 0.1265 0.1268 0.1298 0.0890 0.0685 0.0631
## ma3
## -0.9489
## s.e. 0.0718
##
## sigma^2 estimated as 1.669e+09: log likelihood = -1579.09, aic = 3180.17
fit12$var.coef
## ar1 ar2 ar3 ar4 ar5
## ar1 8.045709e-03 0.0090415667 0.0019688592 -0.0069152408 -0.0074606225
## ar2 9.041567e-03 0.0177990934 0.0108339567 -0.0061226784 -0.0153357577
## ar3 1.968859e-03 0.0108339567 0.0171979587 0.0062806001 -0.0106526157
## ar4 -6.915241e-03 -0.0061226784 0.0062806001 0.0160126093 0.0058162987
## ar5 -7.460623e-03 -0.0153357577 -0.0106526157 0.0058162987 0.0160849674
## ar6 -3.557651e-03 -0.0109162125 -0.0153732090 -0.0061631161 0.0102812063
## ar7 -1.698672e-05 -0.0031932364 -0.0078099659 -0.0073922208 0.0015811450
## ma1 -2.552352e-03 -0.0019455989 0.0005716312 0.0012314048 0.0014592614
## ma2 -6.614284e-04 -0.0021643904 -0.0017762835 -0.0009188394 -0.0001120866
## ma3 1.301866e-03 0.0004421376 -0.0029486698 -0.0026779569 -0.0015038366
## ar6 ar7 ma1 ma2 ma3
## ar1 -0.0035576511 -1.698672e-05 -0.0025523523 -0.0006614284 0.0013018657
## ar2 -0.0109162125 -3.193236e-03 -0.0019455989 -0.0021643904 0.0004421376
## ar3 -0.0153732090 -7.809966e-03 0.0005716312 -0.0017762835 -0.0029486698
## ar4 -0.0061631161 -7.392221e-03 0.0012314048 -0.0009188394 -0.0026779569
## ar5 0.0102812063 1.581145e-03 0.0014592614 -0.0001120866 -0.0015038366
## ar6 0.0168416787 8.506437e-03 -0.0003284125 0.0009494800 0.0003420195
## ar7 0.0085064368 7.928840e-03 -0.0014881726 0.0003431979 0.0019412368
## ma1 -0.0003284125 -1.488173e-03 0.0046863718 0.0018254475 -0.0014425379
## ma2 0.0009494800 3.431979e-04 0.0018254475 0.0039759842 0.0013945514
## ma3 0.0003420195 1.941237e-03 -0.0014425379 0.0013945514 0.0051584810
mean(datadiff1)
## [1] 3417.359
mean(datats)
## [1] 890005.1
dt <- forecast(fit12)
plot(dt)
Box.test(dt$residuals)
##
## Box-Pierce test
##
## data: dt$residuals
## X-squared = 0.027788, df = 1, p-value = 0.8676
hist(dt$residuals)
checkresiduals(fit12)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(7,1,3)
## Q* = 15.492, df = 3, p-value = 0.001441
##
## Model df: 10. Total lags used: 13
predict(fit12, n.ahead = 12)
## $pred
## Time Series:
## Start = 133
## End = 144
## Frequency = 1
## [1] 1088721 1139772 1099521 1126451 1120081 1094052 1156593 1112559 1126637
## [10] 1148960 1115712 1150186
##
## $se
## Time Series:
## Start = 133
## End = 144
## Frequency = 1
## [1] 40873.15 41344.92 42252.17 43176.75 45460.56 48163.87 49940.24 51891.78
## [9] 52433.21 56014.08 57186.72 58619.25