Tugas MPDW
Kelompok 1
Paralel 2 - MPDW 2022
Anggota:
# Berliana Apriyanti
# Kenia Mauilidia
# Mohammad Abror Gustiansyah
# Oksi Al Hadi
# Raffael Julio RogerA. Package dan Data
Library
library("forecast")
library("TTR")
library(readxl)
library("dLagM") #bisa otomatis timeseries datanya
library("dynlm") #data harus timeseries
library("MLmetrics") #MAPE
library("lmtest")
library("car")
library("knitr")
library("bsts")
library("caret")
library("forecast")
library("keras")
library("smooth")
library("tensorflow")
library("tseries")
library("TTR")
library("TSA")
library("graphics")
library("googlesheets4")
library("orcutt")IMPORT DATA
data <- read.csv("D:/Kuliah/Semester 5/MPDW/Data Fix Kelompok MPDW.csv")
head(data)## Waktu N02BA
## 1 1/12/2014 37.9
## 2 1/19/2014 45.9
## 3 1/26/2014 31.5
## 4 2/2/2014 20.7
## 5 2/9/2014 53.3
## 6 2/16/2014 47.3
Data yang digunakan merupakan data penjualan produk obat N02BA selama 149 waktu. Data tersebut merupakan data mingguan dalam rentang waktu 12 Januari 2014 hingga 13 November 2016. Dataset berasal dari Kaggle mengenai volume penjualan analgesik dan antipiretik lainnya, asam salisilat dan turunannya.Dari data tersebut akan dilakukan pemulusan dan peramalan dengan n yang paling optimal. Oleh karena itu, kami mencoba untuk mencari metode yang paling sesuai dan n yang paling optimum dengan membandingkan nilai eror dari masing-masing percobaan.
B. EKSPLORASI DATA
Membagi Data Training dan Testing
training<-data[1:120,2]
testing<-data[121:149,2]Data Time Series
training.ts<-ts(training)
testing.ts<-ts(testing)
data.ts<-ts(data$N02BA)Plot Time Series
plot(data.ts, col="red",main="Plot Penjualan Obat N02BA")
points(data.ts)plot(training.ts, col="blue",main="Plot data training")
points(training.ts)plot(testing.ts, col="blue",main="Plot data testing")
points(testing.ts)C. PEMULUSAN
pemulusan <- function(x,min,max,method=c("SMA","DMA"),
metrik=c("SSE","MSE","RMSE","MAD","MAPE")){
df.master <- data.frame()
z <- max-min+1
temp <- sqrt(z)
a <- round(temp)
b=a
if(a*b<z){
b=b+1
}
par(mfrow=c(a,b))
if(method=="SMA"){
for(i in min:max) {
sma <- TTR::SMA(x,i)
ramal <- c(NA,sma)
df <- cbind(aktual=c(x,NA),mulus=c(sma,NA),ramal)
error <- df[,1]-df[,3]
sse <- sum(error^2,na.rm=T)
mse <- mean(error^2,na.rm=T)
rmse <- sqrt(mse)
mad <- mean(abs(error),na.rm=T)
rerror <- error/df[,1]*100
mape <- mean(abs(rerror),na.rm=T)
ak <- data.frame(n=i,SSE=sse,MSE=mse,RMSE=rmse,MAD=mad,MAPE=mape)
df.master <- rbind(df.master,ak)
ts.plot(x,col="gray",main=paste("Single Moving Average n =",i))
lines(sma,col="green",lwd=2)
lines(ramal,col="red",lwd=1)
}
}
else if(method=="DMA") {
for(i in min:max) {
df_ts_sma <- TTR::SMA(x, n=i)
df_ts_dma <- TTR::SMA(df_ts_sma, n=i)
At <- 2*df_ts_sma-df_ts_dma
Bt <- df_ts_sma-df_ts_dma
pemulusan_dma <- At+Bt
ramal_dma <- c(NA, pemulusan_dma)
df_dma <- cbind(df_aktual=c(x,NA), pemulusan_dma=c(pemulusan_dma,NA), ramal_dma)
error.dma <- df_dma[, 1] - df_dma[, 3]
SSE.dma <- sum(error.dma^2, na.rm = T)
MSE.dma <- mean(error.dma^2, na.rm = T)
RMSE.dma <- sqrt(mean(error.dma^2, na.rm = T))
MAD.dma <- mean(abs(error.dma), na.rm = T)
r.error.dma <- (error.dma/df_dma[, 1])*100
MAPE.dma <- mean(abs(r.error.dma), na.rm = T)
ak <- data.frame(n=i,SSE=SSE.dma,MSE=MSE.dma,RMSE=RMSE.dma,
MAD=MAD.dma,MAPE=MAPE.dma)
df.master <- rbind(df.master,ak)
ts.plot(x,main=paste("Double Moving Average n =",i))
lines(pemulusan_dma,col="green",lwd=2)
lines(ramal_dma,col="red",lwd= 2)
}
}
opt <- df.master$n[which.min(df.master[,metrik])]
return(list(Akurasi=df.master,n.optimum=paste("n optimum adalah",opt)))
}
pemulusan2 <- function(x,alfa=NULL,beta=NULL,method=c("SES","DES"),
metrik=c("SSE","MSE","RMSE")){
df.master <- data.frame()
if(method=="SES"){
for(i in alfa){
df_ses <- HoltWinters(x, alpha = i, beta=F, gamma=F)
sse <- df_ses$SSE
mse <- sse/length(x)
rmse <-sqrt(mse)
ak <- data.frame(Alpha=i,SSE=sse,MSE=mse,RMSE=rmse)
df.master <- rbind(df.master,ak)
datases <- data.frame(x, c(NA, df_ses$fitted[,1]))
colnames(datases) = c("y","yhat")
ts.plot(x, xlab="Periode waktu", ylab="Yt", col="blue", lty=3,main=paste("Single Exponential Smoothing Alpha=",i))
points(x)
lines(datases[,2], col="red",lwd=2) #nilai dugaan
}
}
else if(method=="DES"){
for (i in alfa) {
for (j in beta) {
df_des <- HoltWinters(x, alpha = i, beta=j, gamma=F)
sse <- df_des$SSE
mse <- sse/length(x)
rmse <-sqrt(mse)
ak <- data.frame(Alpha=i,Beta=j,SSE=sse,MSE=mse,RMSE=rmse)
df.master <- rbind(df.master,ak)
datades <- data.frame(x, c(NA, NA, df_des$fitted[,1]))
colnames(datades) = c("y","yhat")
ts.plot(x,xlab="periode waktu", ylab="Yt", col="blue", lty=3,main=paste("Double Exponential Smoothing (Alpha=",i,"Beta=",j))
points(x)
lines (datades[,2], col="red",lwd=2)
}
}
}
if(method=="SES"){
opt <- df.master$Alpha[which.min(df.master[,metrik])]
return(list(Akurasi=df.master,n.optimum=paste("Alpha optimum adalah",opt)))
}
else if(method=="DES"){
opt <- df.master$Alpha[which.min(df.master[,metrik])]
opt2 <- df.master$Beta[which.min(df.master[,metrik])]
return(list(Akurasi=df.master,n.optimum=paste("Alpha optimum adalah",opt,
",Beta optimum adalah",opt2)))
}
}SMA
pemulusan(training.ts,min=2,max=10,method = "SMA",metrik = "MSE")## $Akurasi
## n SSE MSE RMSE MAD MAPE
## 1 2 7622.758 64.59965 8.037390 6.328233 20.94402
## 2 3 6887.476 58.86731 7.672504 6.216889 20.79997
## 3 4 6136.924 52.90452 7.273549 5.919043 19.69723
## 4 5 5471.900 47.58173 6.897952 5.624923 19.04803
## 5 6 5291.616 46.41768 6.813052 5.481386 18.71248
## 6 7 4999.699 44.24512 6.651701 5.373515 18.39936
## 7 8 5035.721 44.96180 6.705356 5.396749 18.52383
## 8 9 4922.811 44.34965 6.659553 5.316707 18.35737
## 9 10 4865.140 44.22855 6.650455 5.341279 18.44550
##
## $n.optimum
## [1] "n optimum adalah 10"
Parameter optimum pada data training yang didapat untuk single moving average (SMA) berdasarkan nilai RMSE terkecil adalah n = 10.
SES
pemulusan2(training.ts,alfa=c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9),
method="SES",metrik="MSE")## $Akurasi
## Alpha SSE MSE RMSE
## 1 0.1 5610.589 46.75491 6.837756
## 2 0.2 5749.288 47.91073 6.921758
## 3 0.3 6032.813 50.27344 7.090377
## 4 0.4 6360.348 53.00290 7.280309
## 5 0.5 6724.638 56.03865 7.485897
## 6 0.6 7133.603 59.44669 7.710168
## 7 0.7 7599.986 63.33322 7.958217
## 8 0.8 8141.001 67.84167 8.236606
## 9 0.9 8780.077 73.16731 8.553789
##
## $n.optimum
## [1] "Alpha optimum adalah 0.1"
Parameter optimum pada data training yang didapat dari metode SES berdasarkan nilai RMSE terkecil adalah α = 0.1.
DMA
pemulusan(training.ts,min=2,max=10,method = "DMA",metrik = "MSE")## $Akurasi
## n SSE MSE RMSE MAD MAPE
## 1 2 12039.794 102.90423 10.144172 8.019850 26.72471
## 2 3 10731.613 93.31837 9.660144 7.892443 25.95314
## 3 4 9874.646 87.38625 9.348061 7.700941 25.62154
## 4 5 8668.948 78.09863 8.837343 7.152838 23.85854
## 5 6 7925.237 72.70859 8.526933 6.553893 21.92395
## 6 7 7262.449 67.87335 8.238529 6.497895 21.83775
## 7 8 7036.620 67.01543 8.186295 6.484874 21.83782
## 8 9 6462.990 62.74747 7.921330 6.259084 21.08635
## 9 10 5891.474 58.33143 7.637501 6.167388 20.95696
##
## $n.optimum
## [1] "n optimum adalah 10"
Parameter optimum pada data training yang didapat untuk double moving average (DMA) berdasarkan nilai RMSE terkecil adalah n = 10.
Berdasarkan perbandingan kedua metode, parameter optimum didapatkan nilai RMSE dari DMA lebih kecil daripada nilai RMSE SMA, sehingga metode pemulusan moving average akan dilanjutkan menggunakan metode DMA.
DES
pemulusan2(training.ts,alfa=c(seq(0.1, 0.9, by = 0.1)),
beta=c(seq(0.1, 0.9, by = 0.1)),method = "DES",metrik = "RMSE")## $Akurasi
## Alpha Beta SSE MSE RMSE
## 1 0.1 0.1 42404.946 353.37455 18.798259
## 2 0.1 0.2 25539.203 212.82669 14.588581
## 3 0.1 0.3 19416.648 161.80540 12.720275
## 4 0.1 0.4 16333.492 136.11244 11.666723
## 5 0.1 0.5 14666.253 122.21877 11.055260
## 6 0.1 0.6 13673.258 113.94381 10.674447
## 7 0.1 0.7 12902.103 107.51753 10.369066
## 8 0.1 0.8 12288.395 102.40329 10.119451
## 9 0.1 0.9 11827.426 98.56188 9.927834
## 10 0.2 0.1 15259.656 127.16380 11.276693
## 11 0.2 0.2 11075.559 92.29632 9.607098
## 12 0.2 0.3 9759.388 81.32823 9.018217
## 13 0.2 0.4 9194.515 76.62096 8.753340
## 14 0.2 0.5 8934.482 74.45402 8.628674
## 15 0.2 0.6 8855.819 73.79849 8.590605
## 16 0.2 0.7 8929.835 74.41529 8.626430
## 17 0.2 0.8 9146.058 76.21715 8.730244
## 18 0.2 0.9 9474.159 78.95132 8.885456
## 19 0.3 0.1 10436.435 86.97029 9.325786
## 20 0.3 0.2 8777.165 73.14304 8.552371
## 21 0.3 0.3 8417.543 70.14619 8.375332
## 22 0.3 0.4 8429.157 70.24297 8.381108
## 23 0.3 0.5 8630.343 71.91952 8.480538
## 24 0.3 0.6 8952.741 74.60618 8.637487
## 25 0.3 0.7 9349.320 77.91100 8.826721
## 26 0.3 0.8 9784.650 81.53875 9.029881
## 27 0.3 0.9 10231.163 85.25969 9.233617
## 28 0.4 0.1 9032.274 75.26895 8.675768
## 29 0.4 0.2 8321.417 69.34514 8.327373
## 30 0.4 0.3 8368.320 69.73600 8.350808
## 31 0.4 0.4 8646.743 72.05619 8.488592
## 32 0.4 0.5 9038.180 75.31817 8.678604
## 33 0.4 0.6 9488.954 79.07462 8.892391
## 34 0.4 0.7 9968.260 83.06883 9.114210
## 35 0.4 0.8 10461.857 87.18214 9.337138
## 36 0.4 0.9 10971.157 91.42631 9.561711
## 37 0.5 0.1 8645.009 72.04174 8.487741
## 38 0.5 0.2 8416.986 70.14155 8.375055
## 39 0.5 0.3 8690.841 72.42368 8.510210
## 40 0.5 0.4 9122.171 76.01810 8.718836
## 41 0.5 0.5 9630.185 80.25154 8.958322
## 42 0.5 0.6 10183.798 84.86499 9.212219
## 43 0.5 0.7 10773.360 89.77800 9.475125
## 44 0.5 0.8 11400.376 95.00313 9.746955
## 45 0.5 0.9 12068.562 100.57135 10.028527
## 46 0.6 0.1 8686.808 72.39007 8.508235
## 47 0.6 0.2 8759.066 72.99222 8.543548
## 48 0.6 0.3 9194.193 76.61828 8.753187
## 49 0.6 0.4 9750.641 81.25534 9.014174
## 50 0.6 0.5 10374.068 86.45057 9.297880
## 51 0.6 0.6 11046.817 92.05680 9.594624
## 52 0.6 0.7 11763.199 98.02666 9.900841
## 53 0.6 0.8 12519.490 104.32909 10.214161
## 54 0.6 0.9 13309.833 110.91527 10.531632
## 55 0.7 0.1 8969.773 74.74811 8.645699
## 56 0.7 0.2 9264.107 77.20089 8.786404
## 57 0.7 0.3 9841.620 82.01350 9.056131
## 58 0.7 0.4 10524.976 87.70813 9.365262
## 59 0.7 0.5 11275.681 93.96401 9.693503
## 60 0.7 0.6 12081.032 100.67527 10.033707
## 61 0.7 0.7 12935.309 107.79424 10.382401
## 62 0.7 0.8 13834.790 115.28992 10.737314
## 63 0.7 0.9 14777.823 123.14853 11.097231
## 64 0.8 0.1 9432.786 78.60655 8.866034
## 65 0.8 0.2 9920.968 82.67474 9.092565
## 66 0.8 0.3 10648.118 88.73431 9.419889
## 67 0.8 0.4 11480.212 95.66843 9.781024
## 68 0.8 0.5 12390.254 103.25211 10.161305
## 69 0.8 0.6 13371.458 111.42882 10.555985
## 70 0.8 0.7 14424.200 120.20167 10.963652
## 71 0.8 0.8 15553.734 129.61445 11.384834
## 72 0.8 0.9 16770.404 139.75337 11.821733
## 73 0.9 0.1 10069.174 83.90978 9.160228
## 74 0.9 0.2 10755.793 89.63160 9.467397
## 75 0.9 0.3 11664.551 97.20459 9.859239
## 76 0.9 0.4 12694.910 105.79091 10.285471
## 77 0.9 0.5 13832.955 115.27463 10.736602
## 78 0.9 0.6 15083.540 125.69617 11.211430
## 79 0.9 0.7 16461.071 137.17559 11.712198
## 80 0.9 0.8 17988.315 149.90262 12.243473
## 81 0.9 0.9 19696.931 164.14109 12.811756
##
## $n.optimum
## [1] "Alpha optimum adalah 0.4 ,Beta optimum adalah 0.2"
Parameter optimum pada data training yang didapat dari metode DES berdasarkan nilia RMSE terkecil adalah α = 0.4 dan β = 0.. Berdasarkan perbandingan metrik nilai RMSE kedua metode dengan parameter optimum, metode DES dinyatakan lebih baik dibandingkan metode SES.
Pemulusan Winter
Import Data
winter <- dataMembagi data menjadi training dan testing
training<-winter[1:120,2]
testing<-winter[121:149,2]Membentuk objek time series
winter.ts<-ts(winter$N02BA, frequency = 12,)
training.ts<-ts(training, frequency = 12)
testing.ts<-ts(testing, start=121, frequency = 12)Membuat plot time series
plot(winter.ts, col="red")
points(winter.ts)plot(training.ts, col="blue")
points(training.ts)Pemulusan
aditif <- HoltWinters(training.ts, seasonal = "additive")
aditif ## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = training.ts, seasonal = "additive")
##
## Smoothing parameters:
## alpha: 0.1373686
## beta : 0.09196788
## gamma: 0.3837979
##
## Coefficients:
## [,1]
## a 30.84693085
## b -0.09977765
## s1 3.69939220
## s2 0.63958218
## s3 -4.09485416
## s4 1.09803257
## s5 3.76818680
## s6 -4.41860571
## s7 -1.23354844
## s8 3.73890017
## s9 2.24849264
## s10 5.66018079
## s11 3.67196213
## s12 3.74859077
Forecasting
ramalan1 <- forecast(aditif, h=23)
ramalan1## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 11 34.44655 25.21957 43.67352 20.335100 48.55799
## Feb 11 31.28696 21.95675 40.61716 17.017637 45.55628
## Mar 11 26.45274 17.00263 35.90286 12.000040 40.90545
## Apr 11 31.54585 21.95836 41.13335 16.883047 46.20866
## May 11 34.11623 24.37323 43.85923 19.215601 49.01686
## Jun 11 25.82966 15.91251 35.74680 10.662695 40.99662
## Jul 11 28.91494 18.80462 39.02526 13.452544 44.37733
## Aug 11 33.78761 23.46485 44.11037 18.000309 49.57491
## Sep 11 32.19742 21.64282 42.75203 16.055555 48.33929
## Oct 11 35.50934 24.70349 46.31518 18.983218 52.03545
## Nov 11 33.42134 22.34493 44.49774 16.481440 50.36124
## Dec 11 33.39819 22.03210 44.76428 16.015256 50.78112
## Jan 12 33.24921 20.52445 45.97398 13.788365 52.71006
## Feb 12 30.08963 17.06411 43.11514 10.168814 50.01044
## Mar 12 25.25541 11.91109 38.59974 4.847027 45.66380
## Apr 12 30.34852 16.66760 44.02944 9.425364 51.27168
## May 12 32.91890 18.88392 46.95388 11.454247 54.38355
## Jun 12 24.63233 10.22616 39.03850 2.599989 46.66467
## Jul 12 27.71761 12.92349 42.51173 5.091952 50.34326
## Aug 12 32.59028 17.39184 47.78871 9.346275 55.83428
## Sep 12 31.00009 15.38137 46.61881 7.113323 54.88686
## Oct 12 34.31200 18.25744 50.36656 9.758671 58.86534
## Nov 12 32.22401 15.71845 48.72956 6.980942 57.46707
Akurasi data training
SSE
sse1.train <- aditif$SSE
sse1.train## [1] 5575.641
Winter Multipikatif
Pemulusan
multi <- HoltWinters(training.ts,seasonal = "multiplicative")
multi## Holt-Winters exponential smoothing with trend and multiplicative seasonal component.
##
## Call:
## HoltWinters(x = training.ts, seasonal = "multiplicative")
##
## Smoothing parameters:
## alpha: 0.1309369
## beta : 0.05785465
## gamma: 0.358739
##
## Coefficients:
## [,1]
## a 31.00149900
## b 0.01620637
## s1 1.14275686
## s2 1.03642577
## s3 0.90311012
## s4 1.05747440
## s5 1.14323365
## s6 0.88181622
## s7 0.97640703
## s8 1.12208949
## s9 1.09451834
## s10 1.20696136
## s11 1.13055355
## s12 1.12421695
Forecasting
ramalan2 <- forecast(multi, h=23)
ramalan2## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 11 35.44570 31.45932 39.43207 29.34907 41.54232
## Feb 11 32.16435 28.02059 36.30811 25.82701 38.50168
## Mar 11 28.04168 23.78103 32.30232 21.52559 34.55777
## Apr 11 32.85184 28.21882 37.48487 25.76625 39.93744
## May 11 35.53460 30.54245 40.52674 27.89977 43.16942
## Jun 11 27.42337 22.65397 32.19277 20.12920 34.71754
## Jul 11 30.38085 25.17994 35.58176 22.42674 38.33496
## Aug 11 34.93194 29.08941 40.77447 25.99656 43.86732
## Sep 11 34.09135 28.05833 40.12438 24.86464 43.31807
## Oct 11 37.61322 30.90183 44.32460 27.34904 47.87740
## Nov 11 35.25040 28.53097 41.96983 24.97392 45.52688
## Dec 11 35.07104 -241.51849 311.66058 -387.93614 458.07823
## Jan 12 35.66793 -52.68748 124.02335 -99.46001 170.79588
## Feb 12 32.36591 -52.17051 116.90233 -96.92139 161.65320
## Mar 12 28.21731 -49.30423 105.73885 -90.34166 146.77628
## Apr 12 33.05750 -62.10216 128.21715 -112.47663 178.59163
## May 12 35.75693 -71.91151 143.42536 -128.90774 200.42159
## Jun 12 27.59486 -59.25299 114.44272 -105.22747 160.41720
## Jul 12 30.57074 -69.67110 130.81258 -122.73592 183.87740
## Aug 12 35.15016 -84.73403 155.03434 -148.19688 218.49719
## Sep 12 34.30421 -87.25843 155.86685 -151.60980 220.21822
## Oct 12 37.84794 -101.26829 176.96417 -174.91198 250.60786
## Nov 12 35.47026 -99.62534 170.56587 -171.14064 242.08117
Akurasi data testing
selisih1<-as.numeric(ramalan1$mean)-as.numeric(testing.ts)## Warning in as.numeric(ramalan1$mean) - as.numeric(testing.ts): longer object
## length is not a multiple of shorter object length
selisih1## [1] -2.3034546 10.2869577 -7.2972563 -2.0541472 -0.4837706 3.5796593
## [7] 3.9149389 5.5376098 -0.4525753 1.4093352 6.3713389 8.3981899
## [13] -5.0507864 -0.5603740 -7.1445880 -4.7514789 5.9188977 -4.5676725
## [19] 2.2176071 2.2402781 10.1000929 8.8120034 -2.1259929 -1.0534546
## [25] -6.9130423 -6.3472563 -8.2041472 7.6162294 -11.4703407
SSEtesting1<-sum(selisih1^2)
selisih2<-as.numeric(ramalan2$mean)-as.numeric(testing.ts)## Warning in as.numeric(ramalan2$mean) - as.numeric(testing.ts): longer object
## length is not a multiple of shorter object length
selisih2## [1] -1.30430447 11.16434574 -5.70832423 -0.74815722 0.93459533 5.17337106
## [7] 5.38084974 6.68193607 1.44135290 3.51321600 8.20039876 10.07104435
## [13] -2.63206516 1.71590615 -4.18269057 -2.04250334 8.75692736 -1.60513644
## [19] 5.07073792 4.80015606 13.40421095 12.34794157 1.12026481 -0.05430447
## [25] -6.03565426 -4.75832423 -6.89815722 9.03459533 -9.87662894
SSEtesting2<-sum(selisih2^2)
akurasi <- matrix(c(SSEtesting1, SSEtesting2), nrow=1, ncol=2)
row.names(akurasi)<- "SSE"
colnames(akurasi) <- c("Aditif", "Multiplikatif")
akurasi## Aditif Multiplikatif
## SSE 1036.486 1222.603
Cek kestasioneran data
Plot time series dan korelogram
acf(data.ts,lag.max = 20)Augmented Dickey-Fuller Test
Kestasioneran data diuji menggunakan ADF-Test dengan hipotesis sebagai berikut:
H0 : Data tidak stasioner
H1 : Data stasioner
adf.test(training.ts) ## Warning in adf.test(training.ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: training.ts
## Dickey-Fuller = -4.3861, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Hasil di atas menunjukkan bahwa P−value hasil uji di atas kurang dari dari alpha=0.05, sehingga tolak H0. Artinya, pada taraf nyata 5% cukup bukti untuk menyatakan bahwa data stasioner.
Pemodelan ARIMA
Penentuan ordo model ARIMA
#Identifikasi Model ARIMA
acf(data.ts, lag.max=20, main="ACF data penjualan obat")pacf(data.ts, lag.max=20, main="PACF data nilai penjualan obat")eacf(data.ts)## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o o o o o o o o o o
## 1 o o o o o o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 o x x o o o o o o o o o o o
## 4 x x x o o o o o o o o o o o
## 5 x o o o o o o o o o o o o o
## 6 x o o x o o o o o o o o o o
## 7 x x x o o o o o o o o o o o
Berdasarkan plot ACF, PACF, dan EACF, diperoleh model sebagai berikut: ARIMA(0,1,7), ARIMA(3,1,3)
Perbandingan Kebaikan Model
model1 <- arima(training.ts,order = c(0,1,7))
model2 <- arima(training.ts,order = c(3,1,3))
Model <- c("ARIMA (0,1,7)","ARIMA (3,1,3)")
AIC <- c(model1$aic,model2$aic)
Akurasi <- data.frame(Model,AIC)
kableExtra::kable(Akurasi)| Model | AIC |
|---|---|
| ARIMA (0,1,7) | 807.9492 |
| ARIMA (3,1,3) | 806.1931 |
Simpulan
Setelah dibandingkan, model ARIMA yang paling baik berdasarkan log likelihood dan ragam terkecil adalah ARIMA(3,1,3).
Diagnostik Model
residual <- model2$residuals
par(mfrow = c(2,2))
qqnorm(residual)
qqline(residual, col = "blue", lwd = 2)
plot(residual, type="o",
ylab = "Sisaan", xlab = "Order", main = "Sisaan Model1 vs Order")
abline(h = 0, col='red')
acf(residual)
pacf(residual)Normal Q-Q Plot Berdasarkan hasil eksplorasi di atas, terlihat bahwa banyak amatan yang di sepanjang garis qq-plot distribusi normal. Sehingga secara eksploratif, dapat disimpulkan bahwa sisaan menyebar normal.
Residual (Sisaan Model1) vs Order Berdasarkan hasil eksplorasi di atas, titik pada plot kebebasan sisaan kebanyakan di sekitar titik nol. Namun, terdapat beberapa titik amatan yang terletak cukup jauh dari titik nol. Sehingga, belum dapat disimpulkan apakah terdapat autokorelasi atau tidak.
Plot ACF dan PACF Berdasarkan hasil eksplorasi di atas, baik dari plot ACF maupun plot PACF, pada keduanya terdapat garis vertikal di lag tertentu yang melebihi tinggi garis biru horizontal. Artinya, menurut kedua plot ini, terdapat autokorelasi pada model.
Uji Formal
- Sisaan Menyebar Normal
Hipotesis:
H0: Sisaan menyebar normal
H1: Sisaan tidak menyebar normal
shapiro.test(residual)##
## Shapiro-Wilk normality test
##
## data: residual
## W = 0.98989, p-value = 0.5248
- Sisaan Saling Bebas/Tidak Terdapat Autokorelasi
Hipotesis:
H0: Tidak terdapat autokorelasi
H1: Terdapat autokorelasi
Box.test(residual, type = "Ljung")##
## Box-Ljung test
##
## data: residual
## X-squared = 0.0030305, df = 1, p-value = 0.9561
- Nilai Tengah Sisaan Sama Dengan Nol
Hipotesis:
H0: Nilai tengah sisaan sama dengan nol
H1: Nilai tengah sisaan tidak sama dengan nol
t.test(residual, mu = 0, conf.level = 0.95)##
## One Sample t-test
##
## data: residual
## t = -0.64316, df = 119, p-value = 0.5214
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -1.5963875 0.8135998
## sample estimates:
## mean of x
## -0.3913938
Overfitting
- Model ARIMA(0,1,7)
model1.a <- Arima(training.ts, order=c(0,1,7), method="ML")
summary(model1.a)## Series: training.ts
## ARIMA(0,1,7)
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 ma7
## -0.7998 -0.1855 0.0478 -0.0307 0.135 -0.1882 0.1993
## s.e. 0.0991 0.1434 0.1523 0.1796 0.192 0.1896 0.1242
##
## sigma^2 = 48.3: log likelihood = -396.97
## AIC=809.95 AICc=811.26 BIC=832.18
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2651762 6.714377 5.393173 -5.016042 18.38681 0.7803313
## ACF1
## Training set -0.02563818
lmtest::coeftest(model1.a)##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.799750 0.099118 -8.0687 7.105e-16 ***
## ma2 -0.185493 0.143389 -1.2936 0.1958
## ma3 0.047823 0.152327 0.3140 0.7536
## ma4 -0.030696 0.179611 -0.1709 0.8643
## ma5 0.134992 0.191951 0.7033 0.4819
## ma6 -0.188176 0.189585 -0.9926 0.3209
## ma7 0.199279 0.124165 1.6050 0.1085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Model ARIMA(3,1,3)
model2.a <- Arima(training.ts,order = c(3,1,3), method="ML")
summary(model2.a)## Series: training.ts
## ARIMA(3,1,3)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## -1.0267 -0.8572 0.0035 0.2299 -0.0443 -0.9134
## s.e. 0.1086 0.1262 0.1065 0.0646 0.0730 0.0604
##
## sigma^2 = 45.5: log likelihood = -395.5
## AIC=805 AICc=806.01 BIC=824.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3839393 6.54579 5.128375 -5.407119 17.46757 0.742018
## ACF1
## Training set -0.005126769
lmtest::coeftest(model2.a)##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -1.0266979 0.1085969 -9.4542 < 2.2e-16 ***
## ar2 -0.8572187 0.1261690 -6.7942 1.089e-11 ***
## ar3 0.0035031 0.1064672 0.0329 0.973752
## ma1 0.2299296 0.0646147 3.5585 0.000373 ***
## ma2 -0.0442927 0.0730473 -0.6064 0.544278
## ma3 -0.9133875 0.0604078 -15.1204 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Perbandingan Kebaikan Model dari Model Overfitting
Model <- c("ARIMA (0,1,7)", "ARIMA (3,1,3)")
AIC <- c(model1$aic,model2.a$aic)
Akurasi <- data.frame(Model,AIC)
akurasioverfitting <- kableExtra::kable(Akurasi)
model.ov_aic <- data.frame(
"Model" = c("ARIMA (0,1,7)", "ARIMA (3,1,3)"),
"AIC" = c(model1$aic,model2.a$aic)
)
model.ov_aic## Model AIC
## 1 ARIMA (0,1,7) 807.9492
## 2 ARIMA (3,1,3) 805.0039
Setelah overfitting model yang paling baik ternyata tetap ARIMA (3,1,3)