Tugas MPDW
Kelompok 1
Paralel 2 - MPDW 2022
Anggota:
# Berliana Apriyanti
# Kenia Mauilidia
# Mohammad Abror Gustiansyah
# Oksi Al Hadi
# Raffael Julio RogerA. Package dan Data
Library
library(forecast)
library(TTR)
library(readxl)
library("dLagM") #bisa otomatis timeseries datanya
library("dynlm") #data harus timeseries
library("MLmetrics") #MAPE
library("lmtest")
library("car")
library("knitr")
library("bsts")
library("caret")
library("forecast")
library("keras")
library("smooth")
library("tensorflow")
library("tseries")
library("TTR")
library("TSA")
library("graphics")
library("googlesheets4")
library("orcutt")## Warning: package 'orcutt' was built under R version 4.1.3
IMPORT DATA
data <- read.csv("D:/Kuliah/Semester 5/MPDW/Data Fix Kelompok MPDW.csv")
head(data)## Waktu N02BA
## 1 1/12/2014 37.9
## 2 1/19/2014 45.9
## 3 1/26/2014 31.5
## 4 2/2/2014 20.7
## 5 2/9/2014 53.3
## 6 2/16/2014 47.3
Data yang digunakan merupakan data penjualan produk obat N02BA selama 149 waktu. Data tersebut merupakan data mingguan dalam rentang waktu 12 Januari 2014 hingga 13 November 2016. Dataset berasal dari Kaggle mengenai volume penjualan analgesik dan antipiretik lainnya, asam salisilat dan turunannya.Dari data tersebut akan dilakukan pemulusan dan peramalan dengan n yang paling optimal. Oleh karena itu, kami mencoba untuk mencari metode yang paling sesuai dan n yang paling optimum dengan membandingkan nilai eror dari masing-masing percobaan.
B. EKSPLORASI DATA
Membagi Data Training dan Testing
training<-data[1:120,2]
testing<-data[121:149,2]Data Time Series
training.ts<-ts(training)
testing.ts<-ts(testing)
data.ts<-ts(data$N02BA)Plot Time Series
plot(data.ts, col="red",main="Plot Penjualan Obat N02BA")
points(data.ts)plot(training.ts, col="blue",main="Plot data training")
points(training.ts)plot(testing.ts, col="blue",main="Plot data testing")
points(testing.ts)C. PEMULUSAN
pemulusan <- function(x,min,max,method=c("SMA","DMA"),
metrik=c("SSE","MSE","RMSE","MAD","MAPE")){
df.master <- data.frame()
z <- max-min+1
temp <- sqrt(z)
a <- round(temp)
b=a
if(a*b<z){
b=b+1
}
par(mfrow=c(a,b))
if(method=="SMA"){
for(i in min:max) {
sma <- TTR::SMA(x,i)
ramal <- c(NA,sma)
df <- cbind(aktual=c(x,NA),mulus=c(sma,NA),ramal)
error <- df[,1]-df[,3]
sse <- sum(error^2,na.rm=T)
mse <- mean(error^2,na.rm=T)
rmse <- sqrt(mse)
mad <- mean(abs(error),na.rm=T)
rerror <- error/df[,1]*100
mape <- mean(abs(rerror),na.rm=T)
ak <- data.frame(n=i,SSE=sse,MSE=mse,RMSE=rmse,MAD=mad,MAPE=mape)
df.master <- rbind(df.master,ak)
ts.plot(x,col="gray",main=paste("Single Moving Average n =",i))
lines(sma,col="green",lwd=2)
lines(ramal,col="red",lwd=1)
}
}
else if(method=="DMA") {
for(i in min:max) {
df_ts_sma <- TTR::SMA(x, n=i)
df_ts_dma <- TTR::SMA(df_ts_sma, n=i)
At <- 2*df_ts_sma-df_ts_dma
Bt <- df_ts_sma-df_ts_dma
pemulusan_dma <- At+Bt
ramal_dma <- c(NA, pemulusan_dma)
df_dma <- cbind(df_aktual=c(x,NA), pemulusan_dma=c(pemulusan_dma,NA), ramal_dma)
error.dma <- df_dma[, 1] - df_dma[, 3]
SSE.dma <- sum(error.dma^2, na.rm = T)
MSE.dma <- mean(error.dma^2, na.rm = T)
RMSE.dma <- sqrt(mean(error.dma^2, na.rm = T))
MAD.dma <- mean(abs(error.dma), na.rm = T)
r.error.dma <- (error.dma/df_dma[, 1])*100
MAPE.dma <- mean(abs(r.error.dma), na.rm = T)
ak <- data.frame(n=i,SSE=SSE.dma,MSE=MSE.dma,RMSE=RMSE.dma,
MAD=MAD.dma,MAPE=MAPE.dma)
df.master <- rbind(df.master,ak)
ts.plot(x,main=paste("Double Moving Average n =",i))
lines(pemulusan_dma,col="green",lwd=2)
lines(ramal_dma,col="red",lwd= 2)
}
}
opt <- df.master$n[which.min(df.master[,metrik])]
return(list(Akurasi=df.master,n.optimum=paste("n optimum adalah",opt)))
}
pemulusan2 <- function(x,alfa=NULL,beta=NULL,method=c("SES","DES"),
metrik=c("SSE","MSE","RMSE")){
df.master <- data.frame()
if(method=="SES"){
for(i in alfa){
df_ses <- HoltWinters(x, alpha = i, beta=F, gamma=F)
sse <- df_ses$SSE
mse <- sse/length(x)
rmse <-sqrt(mse)
ak <- data.frame(Alpha=i,SSE=sse,MSE=mse,RMSE=rmse)
df.master <- rbind(df.master,ak)
datases <- data.frame(x, c(NA, df_ses$fitted[,1]))
colnames(datases) = c("y","yhat")
ts.plot(x, xlab="Periode waktu", ylab="Yt", col="blue", lty=3,main=paste("Single Exponential Smoothing Alpha=",i))
points(x)
lines(datases[,2], col="red",lwd=2) #nilai dugaan
}
}
else if(method=="DES"){
for (i in alfa) {
for (j in beta) {
df_des <- HoltWinters(x, alpha = i, beta=j, gamma=F)
sse <- df_des$SSE
mse <- sse/length(x)
rmse <-sqrt(mse)
ak <- data.frame(Alpha=i,Beta=j,SSE=sse,MSE=mse,RMSE=rmse)
df.master <- rbind(df.master,ak)
datades <- data.frame(x, c(NA, NA, df_des$fitted[,1]))
colnames(datades) = c("y","yhat")
ts.plot(x,xlab="periode waktu", ylab="Yt", col="blue", lty=3,main=paste("Double Exponential Smoothing (Alpha=",i,"Beta=",j))
points(x)
lines (datades[,2], col="red",lwd=2)
}
}
}
if(method=="SES"){
opt <- df.master$Alpha[which.min(df.master[,metrik])]
return(list(Akurasi=df.master,n.optimum=paste("Alpha optimum adalah",opt)))
}
else if(method=="DES"){
opt <- df.master$Alpha[which.min(df.master[,metrik])]
opt2 <- df.master$Beta[which.min(df.master[,metrik])]
return(list(Akurasi=df.master,n.optimum=paste("Alpha optimum adalah",opt,
",Beta optimum adalah",opt2)))
}
}SMA
pemulusan(training.ts,min=2,max=10,method = "SMA",metrik = "MSE")## $Akurasi
## n SSE MSE RMSE MAD MAPE
## 1 2 498256.2 4222.511 64.98085 50.57203 4.900916
## 2 3 664363.9 5678.324 75.35465 59.35897 5.748486
## 3 4 841104.7 7250.902 85.15223 69.32112 6.717538
## 4 5 1034351.0 8994.357 94.83858 78.11304 7.572912
## 5 6 1234588.2 10829.721 104.06595 86.64474 8.404090
## 6 7 1438993.4 12734.455 112.84704 95.63211 9.282839
## 7 8 1626952.7 14526.364 120.52537 103.36496 10.034748
## 8 9 1797688.0 16195.387 127.26110 109.47447 10.620948
## 9 10 1946941.2 17699.466 133.03934 113.83182 11.026980
##
## $n.optimum
## [1] "n optimum adalah 2"
Parameter optimum pada data training yang didapat untuk single moving average (SMA) berdasarkan nilai RMSE terkecil adalah n = 10.
SES
pemulusan2(training.ts,alfa=c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9),
method="SES",metrik="MSE")## $Akurasi
## Alpha SSE MSE RMSE
## 1 0.1 2696841.4 22473.679 149.91224
## 2 0.2 1360141.0 11334.508 106.46365
## 3 0.3 921365.6 7678.046 87.62446
## 4 0.4 702630.2 5855.252 76.51962
## 5 0.5 571650.7 4763.756 69.01997
## 6 0.6 486998.5 4058.320 63.70495
## 7 0.7 430225.1 3585.209 59.87661
## 8 0.8 391526.5 3262.721 57.12023
## 9 0.9 365345.0 3044.542 55.17737
##
## $n.optimum
## [1] "Alpha optimum adalah 0.9"
Parameter optimum pada data training yang didapat dari metode SES berdasarkan nilai RMSE terkecil adalah α = 0.1.
DMA
pemulusan(training.ts,min=2,max=10,method = "DMA",metrik = "MSE")## $Akurasi
## n SSE MSE RMSE MAD MAPE
## 1 2 412575.0 3526.282 59.38251 48.29060 4.686212
## 2 3 568924.1 4947.166 70.33609 58.34783 5.643062
## 3 4 767609.0 6793.000 82.41966 67.92588 6.624476
## 4 5 1097801.6 9890.105 99.44900 82.92252 8.076949
## 5 6 1518597.0 13932.082 118.03424 98.26962 9.507249
## 6 7 1966220.3 18375.890 135.55770 109.61854 10.539646
## 7 8 2343240.2 22316.574 149.38733 116.84524 11.176335
## 8 9 2619640.9 25433.407 159.47855 124.98322 11.884398
## 9 10 2775126.2 27476.497 165.76036 130.42277 12.353901
##
## $n.optimum
## [1] "n optimum adalah 2"
Parameter optimum pada data training yang didapat untuk double moving average (DMA) berdasarkan nilai RMSE terkecil adalah n = 10.
Berdasarkan perbandingan kedua metode, parameter optimum didapatkan nilai RMSE dari DMA lebih kecil daripada nilai RMSE SMA, sehingga metode pemulusan moving average akan dilanjutkan menggunakan metode DMA.
DES
pemulusan2(training.ts,alfa=c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9),
beta=c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9),method="DES",metrik="MSE")## $Akurasi
## Alpha Beta SSE MSE RMSE
## 1 0.1 0.1 2514606.9 20955.057 144.75862
## 2 0.1 0.2 1692695.0 14105.791 118.76780
## 3 0.1 0.3 1511014.8 12591.790 112.21314
## 4 0.1 0.4 1532704.2 12772.535 113.01564
## 5 0.1 0.5 1716714.9 14305.957 119.60751
## 6 0.1 0.6 2016008.4 16800.070 129.61508
## 7 0.1 0.7 2373923.8 19782.698 140.65098
## 8 0.1 0.8 2737249.2 22810.410 151.03116
## 9 0.1 0.9 3103336.3 25861.136 160.81398
## 10 0.2 0.1 1189903.1 9915.859 99.57841
## 11 0.2 0.2 1159653.1 9663.776 98.30451
## 12 0.2 0.3 1319452.8 10995.440 104.85914
## 13 0.2 0.4 1537405.9 12811.716 113.18885
## 14 0.2 0.5 1745021.9 14541.849 120.58959
## 15 0.2 0.6 1886947.1 15724.559 125.39760
## 16 0.2 0.7 1912765.4 15939.712 126.25257
## 17 0.2 0.8 1823304.5 15194.204 123.26477
## 18 0.2 0.9 1673116.4 13942.636 118.07894
## 19 0.3 0.1 878181.0 7318.175 85.54633
## 20 0.3 0.2 933692.6 7780.772 88.20868
## 21 0.3 0.3 1030667.9 8588.899 92.67631
## 22 0.3 0.4 1084742.6 9039.521 95.07640
## 23 0.3 0.5 1071095.0 8925.792 94.47641
## 24 0.3 0.6 1008359.5 8402.996 91.66786
## 25 0.3 0.7 929579.7 7746.497 88.01419
## 26 0.3 0.8 855679.2 7130.660 84.44323
## 27 0.3 0.9 794164.9 6618.041 81.35134
## 28 0.4 0.1 695175.8 5793.131 76.11262
## 29 0.4 0.2 730202.3 6085.019 78.00653
## 30 0.4 0.3 755382.3 6294.852 79.34011
## 31 0.4 0.4 741439.0 6178.658 78.60444
## 32 0.4 0.5 702193.3 5851.611 76.49582
## 33 0.4 0.6 657947.1 5482.892 74.04655
## 34 0.4 0.7 619856.9 5165.474 71.87123
## 35 0.4 0.8 591175.7 4926.464 70.18878
## 36 0.4 0.9 571531.9 4762.765 69.01279
## 37 0.5 0.1 571675.4 4763.962 69.02146
## 38 0.5 0.2 586522.3 4887.686 69.91199
## 39 0.5 0.3 586099.0 4884.158 69.88675
## 40 0.5 0.4 566212.6 4718.438 68.69089
## 41 0.5 0.5 540677.7 4505.648 67.12412
## 42 0.5 0.6 519032.6 4325.271 65.76680
## 43 0.5 0.7 504485.5 4204.046 64.83861
## 44 0.5 0.8 497110.9 4142.591 64.36296
## 45 0.5 0.9 496045.9 4133.716 64.29398
## 46 0.6 0.1 488627.5 4071.896 63.81141
## 47 0.6 0.2 494428.8 4120.240 64.18910
## 48 0.6 0.3 489416.5 4078.471 63.86291
## 49 0.6 0.4 476260.1 3968.834 62.99868
## 50 0.6 0.5 463966.7 3866.389 62.18030
## 51 0.6 0.6 456895.7 3807.464 61.70465
## 52 0.6 0.7 455867.2 3798.893 61.63516
## 53 0.6 0.8 460316.9 3835.975 61.93524
## 54 0.6 0.9 469323.0 3911.025 62.53819
## 55 0.7 0.1 433020.7 3608.506 60.07084
## 56 0.7 0.2 436447.1 3637.059 60.30803
## 57 0.7 0.3 433556.1 3612.968 60.10797
## 58 0.7 0.4 428049.8 3567.082 59.72505
## 59 0.7 0.5 425465.8 3545.548 59.54451
## 60 0.7 0.6 427786.0 3564.883 59.70665
## 61 0.7 0.7 434908.8 3624.240 60.20166
## 62 0.7 0.8 446011.5 3716.762 60.96526
## 63 0.7 0.9 460127.4 3834.395 61.92249
## 64 0.8 0.1 395972.6 3299.772 57.44364
## 65 0.8 0.2 400172.8 3334.773 57.74750
## 66 0.8 0.3 401194.1 3343.284 57.82114
## 67 0.8 0.4 402265.2 3352.210 57.89827
## 68 0.8 0.5 406772.4 3389.770 58.22173
## 69 0.8 0.6 415528.3 3462.736 58.84502
## 70 0.8 0.7 428077.8 3567.315 59.72700
## 71 0.8 0.8 443634.3 3696.953 60.80257
## 72 0.8 0.9 461478.1 3845.651 62.01331
## 73 0.9 0.1 371965.3 3099.711 55.67505
## 74 0.9 0.2 378441.2 3153.676 56.15760
## 75 0.9 0.3 383824.1 3198.534 56.55558
## 76 0.9 0.4 390651.7 3255.431 57.05638
## 77 0.9 0.5 401058.2 3342.152 57.81134
## 78 0.9 0.6 415371.2 3461.426 58.83389
## 79 0.9 0.7 433202.7 3610.022 60.08346
## 80 0.9 0.8 454133.6 3784.447 61.51786
## 81 0.9 0.9 477992.0 3983.266 63.11312
##
## $n.optimum
## [1] "Alpha optimum adalah 0.9 ,Beta optimum adalah 0.1"
Parameter optimum pada data training yang didapat dari metode DES berdasarkan nilia RMSE terkecil adalah α = 0.4 dan β = 0..
Berdasarkan perbandingan metrik nilai RMSE kedua metode dengan parameter optimum, metode DES dinyatakan lebih baik dibandingkan metode SES.
Akurasi Data Training
Pemulusan Winter
Import Data
winter <- dataMembagi data menjadi training dan testing
training<-winter[1:120,2]
testing<-winter[121:149,2]Membentuk objek time series
winter.ts<-ts(winter$N02BA, frequency = 12,)
training.ts<-ts(training, frequency = 12)
testing.ts<-ts(testing, start=121, frequency = 12)Membuat plot time series
plot(winter.ts, col="red")
points(winter.ts)plot(training.ts, col="blue")
points(training.ts)Pemulusan
aditif <- HoltWinters(training.ts, seasonal = "additive")
aditif ## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = training.ts, seasonal = "additive")
##
## Smoothing parameters:
## alpha: 1
## beta : 0.02450413
## gamma: 0
##
## Coefficients:
## [,1]
## a 1168.0902778
## b 4.0625311
## s1 -17.2569444
## s2 -5.1736111
## s3 -22.0486111
## s4 -13.9236111
## s5 26.4930556
## s6 20.4513889
## s7 -26.2152778
## s8 -30.7986111
## s9 -27.6736111
## s10 0.4513889
## s11 58.7847222
## s12 36.9097222
Forecasting
ramalan1 <- forecast(aditif, h=23)
ramalan1## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 11 1154.896 1069.6554 1240.136 1024.5318 1285.260
## Feb 11 1171.042 1049.0075 1293.076 984.4065 1357.677
## Mar 11 1158.229 1006.9417 1309.517 926.8549 1389.604
## Apr 11 1170.417 993.6078 1347.226 900.0108 1440.823
## May 11 1214.896 1014.8422 1414.950 908.9401 1520.852
## Jun 11 1212.917 991.1580 1434.676 873.7659 1552.068
## Jul 11 1170.313 927.9565 1412.669 799.6608 1540.965
## Aug 11 1169.792 907.6681 1431.916 768.9081 1570.676
## Sep 11 1176.979 895.7272 1458.232 746.8413 1607.118
## Oct 11 1209.167 909.2887 1509.045 750.5428 1667.791
## Nov 11 1271.563 953.4589 1589.667 785.0649 1758.061
## Dec 11 1253.750 917.7430 1589.758 739.8714 1767.629
## Jan 12 1203.646 849.9963 1557.296 662.7854 1744.507
## Feb 12 1219.792 848.7116 1590.873 652.2735 1787.311
## Mar 12 1206.980 818.6408 1595.318 613.0666 1800.893
## Apr 12 1219.167 813.7094 1624.625 599.0730 1839.261
## May 12 1263.646 841.1818 1686.111 617.5426 1909.750
## Jun 12 1261.667 822.2851 1701.049 589.6902 1933.644
## Jul 12 1219.063 762.8331 1675.293 521.3195 1916.807
## Aug 12 1218.542 745.5175 1691.567 495.1133 1941.971
## Sep 12 1225.730 735.9489 1715.511 476.6745 1974.785
## Oct 12 1257.917 751.4065 1764.428 483.2758 2032.559
## Nov 12 1320.313 797.0878 1843.539 520.1090 2120.517
Akurasi data training
SSE
sse1.train <- aditif$SSE
sse1.train## [1] 476126.3
Winter Multipikatif
Pemulusan
multi <- HoltWinters(training.ts,seasonal = "multiplicative")
multi## Holt-Winters exponential smoothing with trend and multiplicative seasonal component.
##
## Call:
## HoltWinters(x = training.ts, seasonal = "multiplicative")
##
## Smoothing parameters:
## alpha: 0.8943133
## beta : 0.02683896
## gamma: 1
##
## Coefficients:
## [,1]
## a 1194.7060651
## b 4.5064546
## s1 1.0010641
## s2 0.9930019
## s3 0.9813788
## s4 0.9854053
## s5 0.9996966
## s6 0.9988224
## s7 1.0242293
## s8 0.9999678
## s9 0.9783931
## s10 0.9590284
## s11 1.0064679
## s12 1.0086163
Forecasting
ramalan2 <- forecast(multi, h=23)
ramalan2## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 11 1200.489 1112.3216 1288.656 1065.6488 1335.328
## Feb 11 1195.295 1076.0325 1314.558 1012.8985 1377.692
## Mar 11 1185.727 1041.4704 1329.983 965.1056 1406.348
## Apr 11 1195.032 1027.0642 1363.001 938.1472 1451.918
## May 11 1216.869 1025.4464 1408.292 924.1133 1509.625
## Jun 11 1220.306 1009.1111 1431.501 897.3111 1543.301
## Jul 11 1255.962 1020.7188 1491.206 896.1883 1615.737
## Aug 11 1230.718 982.5058 1478.930 851.1102 1610.326
## Sep 11 1208.574 947.5992 1469.549 809.4475 1607.700
## Oct 11 1188.975 915.4043 1462.546 770.5846 1607.366
## Nov 11 1252.325 948.3779 1556.272 787.4780 1717.172
## Dec 11 1259.543 949.7853 1569.302 785.8092 1733.278
## Jan 12 1254.624 925.0152 1584.232 750.5310 1758.716
## Feb 12 1248.994 905.5824 1592.406 723.7911 1774.198
## Mar 12 1238.797 883.0295 1594.565 694.6973 1782.897
## Apr 12 1248.321 874.9844 1621.657 677.3521 1819.289
## May 12 1270.930 876.2352 1665.625 667.2964 1874.564
## Jun 12 1274.320 863.9323 1684.707 646.6863 1901.954
## Jul 12 1311.350 874.7003 1748.000 643.5517 1979.149
## Aug 12 1284.794 842.3204 1727.267 608.0891 1961.498
## Sep 12 1261.483 812.4738 1710.492 574.7826 1948.183
## Oct 12 1240.837 784.7056 1696.969 543.2442 1938.430
## Nov 12 1306.752 812.4250 1801.079 550.7440 2062.760
Akurasi data testing
selisih1<-as.numeric(ramalan1$mean)-as.numeric(testing.ts)## Warning in as.numeric(ramalan1$mean) - as.numeric(testing.ts): longer object
## length is not a multiple of shorter object length
selisih1## [1] -20.10414 -33.95827 -16.77074 -24.58321 19.89599 -52.08315
## [7] -19.68728 -15.20808 11.97945 44.16698 101.56284 -11.24963
## [13] -41.35376 -315.20790 -183.02037 -690.83284 -1236.35364 -1458.33277
## [19] -1210.93691 -911.45771 -1344.27018 -1212.08265 -1119.68678 -1335.10414
## [25] -1188.95827 -981.77074 -1299.58321 -1295.10401 -1167.08315
SSEtesting1<-sum(selisih1^2)
selisih2<-as.numeric(ramalan2$mean)-as.numeric(testing.ts)## Warning in as.numeric(ramalan2$mean) - as.numeric(testing.ts): longer object
## length is not a multiple of shorter object length
selisih2## [1] 2.548863e+01 -9.704722e+00 1.072688e+01 3.242345e-02 2.186906e+01
## [6] -4.469387e+01 6.596247e+01 4.571803e+01 4.357394e+01 2.397526e+01
## [11] 8.232491e+01 -5.456598e+00 9.623628e+00 -2.860057e+02 -1.512027e+02
## [16] -6.616794e+02 -1.229070e+03 -1.445680e+03 -1.118650e+03 -8.452063e+02
## [21] -1.308517e+03 -1.229163e+03 -1.133248e+03 -1.289511e+03 -1.164705e+03
## [26] -9.542731e+02 -1.274968e+03 -1.293131e+03 -1.159694e+03
SSEtesting2<-sum(selisih2^2)
akurasi <- matrix(c(SSEtesting1, SSEtesting2), nrow=1, ncol=2)
row.names(akurasi)<- "SSE"
colnames(akurasi) <- c("Aditif", "Multiplikatif")
akurasi## Aditif Multiplikatif
## SSE 20001145 19207928
Cek kestasioneran data
Plot time series dan korelogram
acf(training.ts,lag.max = 20)acf(testing.ts,lag.max = 20)Dapat dilihat dari kedua plot ACF di atas bahwa adanya tails off (meluruh menjadi nol secara asimptotik) yang mengindikasikan bahwa data tidak stasioner.
Augmented Dickey-Fuller Test
Kestasioneran data diuji menggunakan ADF-Test dengan hipotesis sebagai berikut:
H0 : Data tidak stasioner
H1 : Data stasioner
adf.test(training.ts) ##
## Augmented Dickey-Fuller Test
##
## data: training.ts
## Dickey-Fuller = -2.2608, Lag order = 4, p-value = 0.4683
## alternative hypothesis: stationary
adf.test(testing.ts) ##
## Augmented Dickey-Fuller Test
##
## data: testing.ts
## Dickey-Fuller = -1.7643, Lag order = 3, p-value = 0.6629
## alternative hypothesis: stationary
Hasil di atas menunjukkan bahwa P−value kedua hasil uji di atas lebih besar dari alpha=0.05, sehingga terima H0. Artinya, pada taraf nyata 5% tidak cukup bukti untuk menyatakan bahwa data stasioner.
Differencing
train.diff<-diff(training.ts, differences = 1) plot.ts(train.diff, lty=1, xlab="Time", ylab="Data Difference", main="Plot Difference")
points(train.diff)acf(train.diff, lag.max=20, main="data difference")Augmented Dickey-Fuller Test
Kestasioneran data diuji menggunakan ADF-Test dengan hipotesis sebagai berikut:
H0 : data hasil differencing tidak stasioner
H1 : data hasil differencing stasioner
adf.test(train.diff) #stasioner## Warning in adf.test(train.diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train.diff
## Dickey-Fuller = -4.0876, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Hasil di atas menunjukkan bahwa P−value=0.01 lebih kecil dari alpha=0.05, sehingga tolak H0. Artinya, pada taraf nyata 5% cukup bukti untuk menyatakan bahwa data hasil differencing telah stasioner.
Pemodelan ARIMA
Penentuan ordo model ARIMA
#Identifikasi Model ARIMA
acf(train.diff, lag.max=20, main="ACF data penjualan obat")pacf(train.diff, lag.max=20, main="PACF data nilai penjualan obat")eacf(train.diff)## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o o x o o o o o o o
## 1 o o o o o o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 x o x o o o o o o o o o o o
## 4 x x x o o o o o o o o o o o
## 5 x o x o o o o o o o o o o o
## 6 o x x o o x o o o o o o o o
## 7 x x x x o x o o o o o o o o
Berdasarkan plot ACF, PACF, dan EACF, diperoleh model sebagai berikut: ARIMA(0,1,7), ARIMA(3,1,3)
Perbandingan Kebaikan Model
model1 <- arima(training.ts,order = c(0,1,7))
model2 <- arima(training.ts,order = c(3,1,3))
Model <- c("ARIMA (0,1,7)","ARIMA (3,1,3)")
AIC <- c(model1$aic,model2$aic)
Akurasi <- data.frame(Model,AIC)
kableExtra::kable(Akurasi)| Model | AIC |
|---|---|
| ARIMA (0,1,7) | 1291.068 |
| ARIMA (3,1,3) | 1288.836 |
Simpulan
Setelah dibandingkan, model ARIMA yang paling baik berdasarkan log likelihood dan ragam terkecil adalah ARIMA(3,1,3).
Diagnostik Model
residual <- model2$residuals
par(mfrow = c(2,2))
qqnorm(residual)
qqline(residual, col = "blue", lwd = 2)
plot(residual, type="o",
ylab = "Sisaan", xlab = "Order", main = "Sisaan Model1 vs Order")
abline(h = 0, col='red')
acf(residual)
pacf(residual)Normal Q-Q Plot Berdasarkan hasil eksplorasi di atas, terlihat bahwa banyak amatan yang di sepanjang garis qq-plot distribusi normal. Sehingga secara eksploratif, dapat disimpulkan bahwa sisaan menyebar normal.
Residual (Sisaan Model1) vs Order Berdasarkan hasil eksplorasi di atas, titik pada plot kebebasan sisaan kebanyakan di sekitar titik nol. Namun, terdapat beberapa titik amatan yang terletak cukup jauh dari titik nol. Sehingga, belum dapat disimpulkan apakah terdapat autokorelasi atau tidak.
Plot ACF dan PACF Berdasarkan hasil eksplorasi di atas, baik dari plot ACF maupun plot PACF, pada keduanya terdapat garis vertikal di lag tertentu yang melebihi tinggi garis biru horizontal. Artinya, menurut kedua plot ini, terdapat autokorelasi pada model.
Uji Formal
- Sisaan Menyebar Normal
Hipotesis:
H0: Sisaan menyebar normal
H1: Sisaan tidak menyebar normal
shapiro.test(residual)##
## Shapiro-Wilk normality test
##
## data: residual
## W = 0.99405, p-value = 0.8946
- Sisaan Saling Bebas/Tidak Terdapat Autokorelasi
Hipotesis:
H0: Tidak terdapat autokorelasi
H1: Terdapat autokorelasi
Box.test(residual, type = "Ljung")##
## Box-Ljung test
##
## data: residual
## X-squared = 0.021283, df = 1, p-value = 0.884
- Nilai Tengah Sisaan Sama Dengan Nol
Hipotesis:
H0: Nilai tengah sisaan sama dengan nol
H1: Nilai tengah sisaan tidak sama dengan nol
t.test(residual, mu = 0, conf.level = 0.95)##
## One Sample t-test
##
## data: residual
## t = 0.56778, df = 119, p-value = 0.5713
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -6.552889 11.821678
## sample estimates:
## mean of x
## 2.634395
Overfitting
- Model ARIMA(0,1,7)
model1.a <- Arima(training.ts, order=c(0,1,7), method="ML")
summary(model1.a)## Series: training.ts
## ARIMA(0,1,7)
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 ma7
## 0.1591 0.0377 0.0501 0.1844 0.0156 0.0299 -0.1573
## s.e. 0.0919 0.0932 0.0955 0.0972 0.1015 0.1058 0.0885
##
## sigma^2 = 2839: log likelihood = -638.53
## AIC=1293.07 AICc=1294.38 BIC=1315.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 3.00534 51.47632 41.76581 0.2336401 4.071661 0.2129199 0.008649975
lmtest::coeftest(model1.a)##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 0.159073 0.091861 1.7317 0.08333 .
## ma2 0.037710 0.093245 0.4044 0.68590
## ma3 0.050146 0.095495 0.5251 0.59950
## ma4 0.184361 0.097195 1.8968 0.05785 .
## ma5 0.015583 0.101535 0.1535 0.87802
## ma6 0.029936 0.105825 0.2829 0.77727
## ma7 -0.157327 0.088463 -1.7784 0.07533 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
- Model ARIMA(3,1,3)
model2.a <- Arima(training.ts,order = c(3,1,3), method="ML")
summary(model2.a)## Series: training.ts
## ARIMA(3,1,3)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## 1.3344 -1.2559 0.3489 -1.2017 1.1507 -0.1422
## s.e. 0.3587 0.3671 0.3285 0.3779 0.4081 0.3841
##
## sigma^2 = 2740: log likelihood = -638.54
## AIC=1291.07 AICc=1292.08 BIC=1310.53
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.793475 50.79551 41.22189 0.2192547 3.999123 0.210147
## ACF1
## Training set -0.002983176
lmtest::coeftest(model2.a)##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 1.33443 0.35868 3.7204 0.0001989 ***
## ar2 -1.25587 0.36712 -3.4209 0.0006242 ***
## ar3 0.34889 0.32850 1.0621 0.2881999
## ma1 -1.20174 0.37789 -3.1801 0.0014720 **
## ma2 1.15065 0.40807 2.8197 0.0048067 **
## ma3 -0.14220 0.38407 -0.3702 0.7112086
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Perbandingan Kebaikan Model dari Model Overfitting
Model <- c("ARIMA (0,1,7)", "ARIMA (3,1,3)")
AIC <- c(model1$aic,model2.a$aic)
Akurasi <- data.frame(Model,AIC)
akurasioverfitting <- kableExtra::kable(Akurasi)
model.ov_aic <- data.frame(
"Model" = c("ARIMA (0,1,7)", "ARIMA (3,1,3)"),
"AIC" = c(model1$aic,model2.a$aic)
)
model.ov_aic## Model AIC
## 1 ARIMA (0,1,7) 1291.068
## 2 ARIMA (3,1,3) 1291.073
Setelah overfitting model yang paling baik ternyata tetap ARIMA (3,1,3)