SIC - Forecasting

Faisal Arkan

10/23/2022

Pendahuluan

Library

library(dplyr)
library(forecast)
library(TTR)
library(graphics)
require(smooth)
require(Mcomp)
library(fpp2)
library(tseries)
library(readxl)
library(AnalyzeTS)
library(hrbrthemes)
library(tseries)

Input Data

airbersih <- read_excel("D:/Kuliah/Satria Data/Dataset/Persentase Rumah Tangga dengan Air Minum Layak (Indikator 6.1.1).xlsx", sheet=5)
head(airbersih)
## # A tibble: 6 × 2
##       t `Clean Water`
##   <dbl>         <dbl>
## 1  1995          38.0
## 2  1996          41.2
## 3  1997          42.8
## 4  1998          42.0
## 5  1999          42.2
## 6  2000          37.5
str(airbersih)
## tibble [27 × 2] (S3: tbl_df/tbl/data.frame)
##  $ t          : num [1:27] 1995 1996 1997 1998 1999 ...
##  $ Clean Water: num [1:27] 38 41.2 42.8 42 42.2 ...

Data Formatting

t <- as.Date(as.character(airbersih$t),"%Y")

Eksplorasi Data

plot(airbersih, main="Proporsi Tingkat Air Minum Bersih dan Layak Indonesia 1995-2021", xlab="Periode Waktu", ylab="Tingkat Air Bersih")
lines(airbersih)

DES with Transformation

Logit Transformation

a <- airbersih$`Clean Water`/100
y <- log(a/(1-a))
data <- data.frame(t=seq(1995:2021),y=y)
head(data)
##   t          y
## 1 1 -0.4882751
## 2 2 -0.3565292
## 3 3 -0.2916499
## 4 4 -0.3248263
## 5 5 -0.3153886
## 6 6 -0.5103990

Double Exponential Smoothing

fit <- ets(data$y,model="AAN")
fit
## ETS(A,A,N) 
## 
## Call:
##  ets(y = data$y, model = "AAN") 
## 
##   Smoothing parameters:
##     alpha = 0.7725 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = -0.6607 
##     b = 0.1067 
## 
##   sigma:  0.2868
## 
##      AIC     AICc      BIC 
## 27.22164 30.07878 33.70083

Forecasting

peramalan <- forecast(fit)
dataramal <- data.frame(t=seq(1:37),y=c(y,peramalan$mean))
dataramal$a <- exp(dataramal$y)/(1+exp(dataramal$y))
batasbawah <- exp(peramalan$lower[,2])/(1+exp(peramalan$lower[,2]))
batasatas <- exp(peramalan$upper[,2])/(1+exp(peramalan$upper[,2]))

# Plot
plot(dataramal$a, ylim=c(0,1))
lines(dataramal$a)
lines(28:37,batasbawah, col="red")
lines(28:37, batasatas, col="red")

Keakuratan

ramal.fit <- exp(peramalan$fitted)/(1+exp(peramalan$fitted))*100
par(mfrow=c(1,2))
plot(airbersih$`Clean Water`, ylim=c(0,100))
lines(airbersih$`Clean Water`)
lines(ramal.fit,col="red")
points(ramal.fit,col="red")
MAPE <- mean(abs((airbersih$`Clean Water`-ramal.fit)/airbersih$`Clean Water`),na.rm=TRUE)*100
MAPE
## [1] 6.842992

Fuzzy Time Series

Nilai Maks-Min, Banyak Kelas

minimal=min(airbersih$`Clean Water`)
 minimal
## [1] 37.51
 maksimal=max(airbersih$`Clean Water`)
 maksimal
## [1] 90.78
n=1+3.332*log10(26)
 n
## [1] 5.714691

Algoritma Fuzzy

fuzzy.ts=ts(airbersih$`Clean Water`, start=1995, frequency=1)
 fuzzy.ts
## Time Series:
## Start = 1995 
## End = 2021 
## Frequency = 1 
##  [1] 38.03 41.18 42.76 41.95 42.18 37.51 48.68 48.33 47.33 48.81 47.62 47.79
## [13] 48.31 46.45 47.71 44.19 63.95 64.87 67.93 68.38 70.97 71.14 72.04 73.68
## [25] 89.27 90.21 90.78
plot(fuzzy.ts)

 chen=Gfuzzy.ts1(fuzzy.ts,n=6,type="Chen",plot=TRUE,grid=TRUE)

 chen
## Time Series:
## Start = 1995 
## End = 2021 
## Frequency = 1 
##  [1]       NA 50.82750 50.82750 50.82750 50.82750 50.82750 50.82750 46.38833
##  [9] 46.38833 46.38833 46.38833 46.38833 46.38833 46.38833 46.38833 46.38833
## [17] 50.82750 68.58417 73.02333 73.02333 73.02333 73.02333 73.02333 73.02333
## [25] 86.34083 86.34083 86.34083
PE=array(NA, dim=c(n))
for(i in 1:n){
  PE[i]=abs((fuzzy.ts[i]-chen[i])/fuzzy.ts[i])}
PE
## [1]        NA 0.2342763 0.1886693 0.2116210 0.2050142
MAPE=mean(PE,na.rm=TRUE)
MAPE
## [1] 0.2098952

Double Moving Average

Fungsi DMA

#Membuat Fungsi DMA
DMA <- function(data, orde) {
#Mendefinisikan vektor MA Pertama
s1=c()
#Menghitung MA pertama
for (i in orde:length(data)) {
  s1[i] = mean(data[(i-orde+1):i])
}
##Mendefinisikan vektor MA Kedua
s2=c()
#Menghitung MA kedua
for (j in (2*orde-1):length(data)) {
  s2[j] = mean(s1[(j-orde+1):j])
}
#Mendefinisikan Konstanta  dan KOefisien Slope
a= c()
b= c()
#Menghitung Konstanta dan Koefisien Slope
for (k in (2*orde-1):length(data)) {
  a[k] = s1[k] + (s1[k]-s2[k])
  b[k] = 2/(orde-1)*(s1[k]-s2[k])
}
#Mendefinisikan Ventor Peramalan
f =c()
#Menghitung Peramalan data
f[2*orde-1] = a[2*orde-1] 
for (l in (2*orde):(length(data)+1)) {
  f[l] = a[l-1]+b[l-1]
}
#Mendefinisikan Ventor Precentage Error
PE = c()
#menghitung PE
for (m in (2*orde-1):length(data)){
  PE[m] = abs(data[m]-f[m])/data[m]*100
}

#Menghitung MAPE
MAPE = mean(PE, na.rm = TRUE)

Hasil_Perhitungan = data.frame(Data = data, S1 = s1, S2 = s2, a = a, b = b, Ft= f[-length(f)], PE = PE)
list (Hasil_perhitungan = Hasil_Perhitungan, MAPE = MAPE, Peramalan_1_Periode_kedepan = f[length(f)])
}

Aplikasi DMA

#DMA dengan orde 2
DMA(airbersih$`Clean Water`, 2)
## $Hasil_perhitungan
##     Data     S1      S2       a      b       Ft         PE
## 1  38.03     NA      NA      NA     NA       NA         NA
## 2  41.18 39.605      NA      NA     NA       NA         NA
## 3  42.76 41.970 40.7875 43.1525  2.365  43.1525  0.9179139
## 4  41.95 42.355 42.1625 42.5475  0.385  45.5175  8.5041716
## 5  42.18 42.065 42.2100 41.9200 -0.290  42.9325  1.7840209
## 6  37.51 39.845 40.9550 38.7350 -2.220  41.6300 10.9837377
## 7  48.68 43.095 41.4700 44.7200  3.250  36.5150 24.9897288
## 8  48.33 48.505 45.8000 51.2100  5.410  47.9700  0.7448790
## 9  47.33 47.830 48.1675 47.4925 -0.675  56.6200 19.6281428
## 10 48.81 48.070 47.9500 48.1900  0.240  46.8175  4.0821553
## 11 47.62 48.215 48.1425 48.2875  0.145  48.4300  1.7009660
## 12 47.79 47.705 47.9600 47.4500 -0.510  48.4325  1.3444235
## 13 48.31 48.050 47.8775 48.2225  0.345  46.9400  2.8358518
## 14 46.45 47.380 47.7150 47.0450 -0.670  48.5675  4.5586652
## 15 47.71 47.080 47.2300 46.9300 -0.300  46.3750  2.7981555
## 16 44.19 45.950 46.5150 45.3850 -1.130  46.6300  5.5216112
## 17 63.95 54.070 50.0100 58.1300  8.120  44.2550 30.7974980
## 18 64.87 64.410 59.2400 69.5800 10.340  66.2500  2.1273316
## 19 67.93 66.400 65.4050 67.3950  1.990  79.9200 17.6505226
## 20 68.38 68.155 67.2775 69.0325  1.755  69.3850  1.4697280
## 21 70.97 69.675 68.9150 70.4350  1.520  70.7875  0.2571509
## 22 71.14 71.055 70.3650 71.7450  1.380  71.9550  1.1456283
## 23 72.04 71.590 71.3225 71.8575  0.535  73.1250  1.5061077
## 24 73.68 72.860 72.2250 73.4950  1.270  72.3925  1.7474213
## 25 89.27 81.475 77.1675 85.7825  8.615  74.7650 16.2484597
## 26 90.21 89.740 85.6075 93.8725  8.265  94.3975  4.6419466
## 27 90.78 90.495 90.1175 90.8725  0.755 102.1375 12.5110156
## 
## $MAPE
## [1] 7.219889
## 
## $Peramalan_1_Periode_kedepan
## [1] 91.6275

Plot Perbandingan

Hasil_hitung = DMA(airbersih$`Clean Water`, 2)$Hasil_perhitungan
X = data.frame (t=airbersih$t, CleanWater = airbersih$`Clean Water`, Ft = Hasil_hitung$Ft)
X
##       t CleanWater       Ft
## 1  1995      38.03       NA
## 2  1996      41.18       NA
## 3  1997      42.76  43.1525
## 4  1998      41.95  45.5175
## 5  1999      42.18  42.9325
## 6  2000      37.51  41.6300
## 7  2001      48.68  36.5150
## 8  2002      48.33  47.9700
## 9  2003      47.33  56.6200
## 10 2004      48.81  46.8175
## 11 2005      47.62  48.4300
## 12 2006      47.79  48.4325
## 13 2007      48.31  46.9400
## 14 2008      46.45  48.5675
## 15 2009      47.71  46.3750
## 16 2010      44.19  46.6300
## 17 2011      63.95  44.2550
## 18 2012      64.87  66.2500
## 19 2013      67.93  79.9200
## 20 2014      68.38  69.3850
## 21 2015      70.97  70.7875
## 22 2016      71.14  71.9550
## 23 2017      72.04  73.1250
## 24 2018      73.68  72.3925
## 25 2019      89.27  74.7650
## 26 2020      90.21  94.3975
## 27 2021      90.78 102.1375
ggplot(X) +
  geom_line(mapping = aes(t, Ft, color = "Peramalan"))+
  geom_line(mapping = aes(t, CleanWater, color = "Data Asli"))+
  geom_point(mapping = aes(t, Ft, color = "Peramalan"))+
  geom_point(mapping = aes(t,CleanWater, color = "Data Asli")) +
  theme_classic() +
  labs(x = "Periode", y = "Tingkat Air Minum Bersih") 

Double Exponential Smoothing

Pemulusan

cleanwater.ts<- ts(airbersih$`Clean Water`, start = 1, end = 26)
des.1<- HoltWinters(cleanwater.ts, gamma = FALSE)

# Plot
plot(des.1)
legend("bottomright",c("data aktual","data pemulusan"), lty=c(1,1), col=c("black","red"), cex=0.8)

Forecasting

ramalan.1<- forecast(des.1, h=10) #menampilkan ramalan 10 periode
ramalan.1
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 27       93.06661 85.98416 100.1491 82.23493 103.8983
## 28       95.97969 86.79036 105.1690 81.92582 110.0336
## 29       98.89278 87.96176 109.8238 82.17522 115.6103
## 30      101.80587 89.34394 114.2678 82.74699 120.8647
## 31      104.71896 90.86653 118.5714 83.53350 125.9044
## 32      107.63204 92.49053 122.7735 84.47510 130.7890
## 33      110.54513 94.19169 126.8986 85.53470 135.5556
## 34      113.45822 95.95373 130.9627 86.68741 140.2290
## 35      116.37130 97.76512 134.9775 87.91560 144.8270
## 36      119.28439 99.61736 138.9514 89.20626 149.3625
plot(ramalan.1)

Nilai Keakuratan

SSE1= des.1$SSE
MSE1= SSE1/length(cleanwater.ts)
RMSE1 = sqrt(MSE1)
MAPE1 <- mean(abs((cleanwater.ts-des.1$fitted[,1])/cleanwater.ts),na.rm=TRUE)*100

akurasi.des1 <- matrix(c(SSE1,MSE1,RMSE1,MAPE1))
row.names(akurasi.des1)<- c("SSE", "MSE", "RMSE", "MAPE")
colnames(akurasi.des1) <- c("Akurasi")
akurasi.des1
##         Akurasi
## SSE  730.569379
## MSE   28.098822
## RMSE   5.300832
## MAPE   7.808781

Pemodelan ARIMA

Training Testing

training <- airbersih[1:18,]
testing <- airbersih[19:26,]

# Data Time Series
training.ts <- ts(training$`Clean Water`)
testing.ts <- ts(testing$`Clean Water`)

Stationer Data

adf.test(training.ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  training.ts
## Dickey-Fuller = -2.0645, Lag order = 2, p-value = 0.5478
## alternative hypothesis: stationary
acf(training.ts)

## Transformasi

library(MASS)
trainTrans <- (training$`Clean Water`)^(-1)
boxcox(trainTrans~training$t)

train.tr.dif<- diff(trainTrans,differences = 4)
acf(train.tr.dif)

adf.test(train.tr.dif)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train.tr.dif
## Dickey-Fuller = -4.3518, Lag order = 2, p-value = 0.01098
## alternative hypothesis: stationary
plot(train.tr.dif,
     col = "black",
     lwd = 1,
     type = "o",
     xlab = "Time",
     ylab = "Data",
     main = "PTBA")

acf(train.tr.dif, lag.max = 20) 

pacf(train.tr.dif, lag.max = 20)