Pendahuluan
Library
library(dplyr)
library(forecast)
library(TTR)
library(graphics)
require(smooth)
require(Mcomp)
library(fpp2)
library(tseries)
library(readxl)
library(AnalyzeTS)
library(hrbrthemes)
library(tseries)
Input Data
airbersih <- read_excel("D:/Kuliah/Satria Data/Dataset/Persentase Rumah Tangga dengan Air Minum Layak (Indikator 6.1.1).xlsx", sheet=5)
head(airbersih)
## # A tibble: 6 × 2
## t `Clean Water`
## <dbl> <dbl>
## 1 1995 38.0
## 2 1996 41.2
## 3 1997 42.8
## 4 1998 42.0
## 5 1999 42.2
## 6 2000 37.5
str(airbersih)
## tibble [27 × 2] (S3: tbl_df/tbl/data.frame)
## $ t : num [1:27] 1995 1996 1997 1998 1999 ...
## $ Clean Water: num [1:27] 38 41.2 42.8 42 42.2 ...
Data Formatting
t <- as.Date(as.character(airbersih$t),"%Y")
Eksplorasi Data
plot(airbersih, main="Proporsi Tingkat Air Minum Bersih dan Layak Indonesia 1995-2021", xlab="Periode Waktu", ylab="Tingkat Air Bersih")
lines(airbersih)
DES with Transformation
Logit Transformation
a <- airbersih$`Clean Water`/100
y <- log(a/(1-a))
data <- data.frame(t=seq(1995:2021),y=y)
head(data)
## t y
## 1 1 -0.4882751
## 2 2 -0.3565292
## 3 3 -0.2916499
## 4 4 -0.3248263
## 5 5 -0.3153886
## 6 6 -0.5103990
Double Exponential Smoothing
fit <- ets(data$y,model="AAN")
fit
## ETS(A,A,N)
##
## Call:
## ets(y = data$y, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.7725
## beta = 1e-04
##
## Initial states:
## l = -0.6607
## b = 0.1067
##
## sigma: 0.2868
##
## AIC AICc BIC
## 27.22164 30.07878 33.70083
Forecasting
peramalan <- forecast(fit)
dataramal <- data.frame(t=seq(1:37),y=c(y,peramalan$mean))
dataramal$a <- exp(dataramal$y)/(1+exp(dataramal$y))
batasbawah <- exp(peramalan$lower[,2])/(1+exp(peramalan$lower[,2]))
batasatas <- exp(peramalan$upper[,2])/(1+exp(peramalan$upper[,2]))
# Plot
plot(dataramal$a, ylim=c(0,1))
lines(dataramal$a)
lines(28:37,batasbawah, col="red")
lines(28:37, batasatas, col="red")
Keakuratan
ramal.fit <- exp(peramalan$fitted)/(1+exp(peramalan$fitted))*100
par(mfrow=c(1,2))
plot(airbersih$`Clean Water`, ylim=c(0,100))
lines(airbersih$`Clean Water`)
lines(ramal.fit,col="red")
points(ramal.fit,col="red")
MAPE <- mean(abs((airbersih$`Clean Water`-ramal.fit)/airbersih$`Clean Water`),na.rm=TRUE)*100
MAPE
## [1] 6.842992
Fuzzy Time Series
Nilai Maks-Min, Banyak Kelas
minimal=min(airbersih$`Clean Water`)
minimal
## [1] 37.51
maksimal=max(airbersih$`Clean Water`)
maksimal
## [1] 90.78
n=1+3.332*log10(26)
n
## [1] 5.714691
Algoritma Fuzzy
fuzzy.ts=ts(airbersih$`Clean Water`, start=1995, frequency=1)
fuzzy.ts
## Time Series:
## Start = 1995
## End = 2021
## Frequency = 1
## [1] 38.03 41.18 42.76 41.95 42.18 37.51 48.68 48.33 47.33 48.81 47.62 47.79
## [13] 48.31 46.45 47.71 44.19 63.95 64.87 67.93 68.38 70.97 71.14 72.04 73.68
## [25] 89.27 90.21 90.78
plot(fuzzy.ts)
chen=Gfuzzy.ts1(fuzzy.ts,n=6,type="Chen",plot=TRUE,grid=TRUE)
chen
## Time Series:
## Start = 1995
## End = 2021
## Frequency = 1
## [1] NA 50.82750 50.82750 50.82750 50.82750 50.82750 50.82750 46.38833
## [9] 46.38833 46.38833 46.38833 46.38833 46.38833 46.38833 46.38833 46.38833
## [17] 50.82750 68.58417 73.02333 73.02333 73.02333 73.02333 73.02333 73.02333
## [25] 86.34083 86.34083 86.34083
PE=array(NA, dim=c(n))
for(i in 1:n){
PE[i]=abs((fuzzy.ts[i]-chen[i])/fuzzy.ts[i])}
PE
## [1] NA 0.2342763 0.1886693 0.2116210 0.2050142
MAPE=mean(PE,na.rm=TRUE)
MAPE
## [1] 0.2098952
Double Moving Average
Fungsi DMA
#Membuat Fungsi DMA
DMA <- function(data, orde) {
#Mendefinisikan vektor MA Pertama
s1=c()
#Menghitung MA pertama
for (i in orde:length(data)) {
s1[i] = mean(data[(i-orde+1):i])
}
##Mendefinisikan vektor MA Kedua
s2=c()
#Menghitung MA kedua
for (j in (2*orde-1):length(data)) {
s2[j] = mean(s1[(j-orde+1):j])
}
#Mendefinisikan Konstanta dan KOefisien Slope
a= c()
b= c()
#Menghitung Konstanta dan Koefisien Slope
for (k in (2*orde-1):length(data)) {
a[k] = s1[k] + (s1[k]-s2[k])
b[k] = 2/(orde-1)*(s1[k]-s2[k])
}
#Mendefinisikan Ventor Peramalan
f =c()
#Menghitung Peramalan data
f[2*orde-1] = a[2*orde-1]
for (l in (2*orde):(length(data)+1)) {
f[l] = a[l-1]+b[l-1]
}
#Mendefinisikan Ventor Precentage Error
PE = c()
#menghitung PE
for (m in (2*orde-1):length(data)){
PE[m] = abs(data[m]-f[m])/data[m]*100
}
#Menghitung MAPE
MAPE = mean(PE, na.rm = TRUE)
Hasil_Perhitungan = data.frame(Data = data, S1 = s1, S2 = s2, a = a, b = b, Ft= f[-length(f)], PE = PE)
list (Hasil_perhitungan = Hasil_Perhitungan, MAPE = MAPE, Peramalan_1_Periode_kedepan = f[length(f)])
}
Aplikasi DMA
#DMA dengan orde 2
DMA(airbersih$`Clean Water`, 2)
## $Hasil_perhitungan
## Data S1 S2 a b Ft PE
## 1 38.03 NA NA NA NA NA NA
## 2 41.18 39.605 NA NA NA NA NA
## 3 42.76 41.970 40.7875 43.1525 2.365 43.1525 0.9179139
## 4 41.95 42.355 42.1625 42.5475 0.385 45.5175 8.5041716
## 5 42.18 42.065 42.2100 41.9200 -0.290 42.9325 1.7840209
## 6 37.51 39.845 40.9550 38.7350 -2.220 41.6300 10.9837377
## 7 48.68 43.095 41.4700 44.7200 3.250 36.5150 24.9897288
## 8 48.33 48.505 45.8000 51.2100 5.410 47.9700 0.7448790
## 9 47.33 47.830 48.1675 47.4925 -0.675 56.6200 19.6281428
## 10 48.81 48.070 47.9500 48.1900 0.240 46.8175 4.0821553
## 11 47.62 48.215 48.1425 48.2875 0.145 48.4300 1.7009660
## 12 47.79 47.705 47.9600 47.4500 -0.510 48.4325 1.3444235
## 13 48.31 48.050 47.8775 48.2225 0.345 46.9400 2.8358518
## 14 46.45 47.380 47.7150 47.0450 -0.670 48.5675 4.5586652
## 15 47.71 47.080 47.2300 46.9300 -0.300 46.3750 2.7981555
## 16 44.19 45.950 46.5150 45.3850 -1.130 46.6300 5.5216112
## 17 63.95 54.070 50.0100 58.1300 8.120 44.2550 30.7974980
## 18 64.87 64.410 59.2400 69.5800 10.340 66.2500 2.1273316
## 19 67.93 66.400 65.4050 67.3950 1.990 79.9200 17.6505226
## 20 68.38 68.155 67.2775 69.0325 1.755 69.3850 1.4697280
## 21 70.97 69.675 68.9150 70.4350 1.520 70.7875 0.2571509
## 22 71.14 71.055 70.3650 71.7450 1.380 71.9550 1.1456283
## 23 72.04 71.590 71.3225 71.8575 0.535 73.1250 1.5061077
## 24 73.68 72.860 72.2250 73.4950 1.270 72.3925 1.7474213
## 25 89.27 81.475 77.1675 85.7825 8.615 74.7650 16.2484597
## 26 90.21 89.740 85.6075 93.8725 8.265 94.3975 4.6419466
## 27 90.78 90.495 90.1175 90.8725 0.755 102.1375 12.5110156
##
## $MAPE
## [1] 7.219889
##
## $Peramalan_1_Periode_kedepan
## [1] 91.6275
Plot Perbandingan
Hasil_hitung = DMA(airbersih$`Clean Water`, 2)$Hasil_perhitungan
X = data.frame (t=airbersih$t, CleanWater = airbersih$`Clean Water`, Ft = Hasil_hitung$Ft)
X
## t CleanWater Ft
## 1 1995 38.03 NA
## 2 1996 41.18 NA
## 3 1997 42.76 43.1525
## 4 1998 41.95 45.5175
## 5 1999 42.18 42.9325
## 6 2000 37.51 41.6300
## 7 2001 48.68 36.5150
## 8 2002 48.33 47.9700
## 9 2003 47.33 56.6200
## 10 2004 48.81 46.8175
## 11 2005 47.62 48.4300
## 12 2006 47.79 48.4325
## 13 2007 48.31 46.9400
## 14 2008 46.45 48.5675
## 15 2009 47.71 46.3750
## 16 2010 44.19 46.6300
## 17 2011 63.95 44.2550
## 18 2012 64.87 66.2500
## 19 2013 67.93 79.9200
## 20 2014 68.38 69.3850
## 21 2015 70.97 70.7875
## 22 2016 71.14 71.9550
## 23 2017 72.04 73.1250
## 24 2018 73.68 72.3925
## 25 2019 89.27 74.7650
## 26 2020 90.21 94.3975
## 27 2021 90.78 102.1375
ggplot(X) +
geom_line(mapping = aes(t, Ft, color = "Peramalan"))+
geom_line(mapping = aes(t, CleanWater, color = "Data Asli"))+
geom_point(mapping = aes(t, Ft, color = "Peramalan"))+
geom_point(mapping = aes(t,CleanWater, color = "Data Asli")) +
theme_classic() +
labs(x = "Periode", y = "Tingkat Air Minum Bersih")
Double Exponential Smoothing
Pemulusan
cleanwater.ts<- ts(airbersih$`Clean Water`, start = 1, end = 26)
des.1<- HoltWinters(cleanwater.ts, gamma = FALSE)
# Plot
plot(des.1)
legend("bottomright",c("data aktual","data pemulusan"), lty=c(1,1), col=c("black","red"), cex=0.8)
Forecasting
ramalan.1<- forecast(des.1, h=10) #menampilkan ramalan 10 periode
ramalan.1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 27 93.06661 85.98416 100.1491 82.23493 103.8983
## 28 95.97969 86.79036 105.1690 81.92582 110.0336
## 29 98.89278 87.96176 109.8238 82.17522 115.6103
## 30 101.80587 89.34394 114.2678 82.74699 120.8647
## 31 104.71896 90.86653 118.5714 83.53350 125.9044
## 32 107.63204 92.49053 122.7735 84.47510 130.7890
## 33 110.54513 94.19169 126.8986 85.53470 135.5556
## 34 113.45822 95.95373 130.9627 86.68741 140.2290
## 35 116.37130 97.76512 134.9775 87.91560 144.8270
## 36 119.28439 99.61736 138.9514 89.20626 149.3625
plot(ramalan.1)
Nilai Keakuratan
SSE1= des.1$SSE
MSE1= SSE1/length(cleanwater.ts)
RMSE1 = sqrt(MSE1)
MAPE1 <- mean(abs((cleanwater.ts-des.1$fitted[,1])/cleanwater.ts),na.rm=TRUE)*100
akurasi.des1 <- matrix(c(SSE1,MSE1,RMSE1,MAPE1))
row.names(akurasi.des1)<- c("SSE", "MSE", "RMSE", "MAPE")
colnames(akurasi.des1) <- c("Akurasi")
akurasi.des1
## Akurasi
## SSE 730.569379
## MSE 28.098822
## RMSE 5.300832
## MAPE 7.808781
Pemodelan ARIMA
Training Testing
training <- airbersih[1:18,]
testing <- airbersih[19:26,]
# Data Time Series
training.ts <- ts(training$`Clean Water`)
testing.ts <- ts(testing$`Clean Water`)
Stationer Data
adf.test(training.ts)
##
## Augmented Dickey-Fuller Test
##
## data: training.ts
## Dickey-Fuller = -2.0645, Lag order = 2, p-value = 0.5478
## alternative hypothesis: stationary
acf(training.ts)
## Transformasi
library(MASS)
trainTrans <- (training$`Clean Water`)^(-1)
boxcox(trainTrans~training$t)
train.tr.dif<- diff(trainTrans,differences = 4)
acf(train.tr.dif)
adf.test(train.tr.dif)
##
## Augmented Dickey-Fuller Test
##
## data: train.tr.dif
## Dickey-Fuller = -4.3518, Lag order = 2, p-value = 0.01098
## alternative hypothesis: stationary
plot(train.tr.dif,
col = "black",
lwd = 1,
type = "o",
xlab = "Time",
ylab = "Data",
main = "PTBA")
acf(train.tr.dif, lag.max = 20)
pacf(train.tr.dif, lag.max = 20)