Data Preparation

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TTR)
## Warning: package 'TTR' was built under R version 4.1.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.1.3
library(lmtest) #uji-Durbin Watson
## Warning: package 'lmtest' was built under R version 4.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(orcutt) #Cochrane-Orcutt
## Warning: package 'orcutt' was built under R version 4.1.3
library(HoRM) #Hildreth Lu
## Warning: package 'HoRM' was built under R version 4.1.3

Data

dataregresi <- read_excel("C:/Users/62812/Documents/SEMESTER 5/MPDW/data.xlsx", sheet="Sheet1")
head(dataregresi)
## # A tibble: 6 x 2
##   TAHUN   IPM
##   <dbl> <dbl>
## 1  2010  76.3
## 2  2011  77.0
## 3  2012  77.5
## 4  2013  78.1
## 5  2014  78.4
## 6  2015  79.0
knitr::kable(dataregresi,align = "c")
TAHUN IPM
2010 76.31
2011 76.98
2012 77.53
2013 78.08
2014 78.39
2015 78.99
2016 79.60
2017 80.06
2018 80.47
2019 80.76
2020 80.77
2021 81.11

Data di atas merupakan data Indeks Pembangunan Manusia (IPM) pada Provinsi DKI Jakarta dari tahun 2011-2021.

Eksplorasi Data

x <- dataregresi$TAHUN
y <- dataregresi$IPM

##diagram pencar identifikasi model
plot(x,y,pch = 20, col = "blue", main = "Scatter Plot X vs Y",
     ylab = "Tahun", xlab = "Nilai IPM Provinsi DKI Jakarta")

#korelasi x dan y
cor(x,y)
## [1] 0.9879386

Terlihat bahwa keduanya memiliki hubungan kuat dan positif. Didukung dengan nilai korelasi yang dimiliki sangat tinggi, yaitu 0.987.

Model Regresi

model <- lm(y~x, data = dataregresi)
summary(model)
## 
## Call:
## lm(formula = y ~ x, data = dataregresi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42154 -0.16017  0.05061  0.16141  0.30594 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -816.54150   44.39144  -18.39 4.86e-09 ***
## x              0.44437    0.02202   20.18 1.97e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2634 on 10 degrees of freedom
## Multiple R-squared:  0.976,  Adjusted R-squared:  0.9736 
## F-statistic: 407.1 on 1 and 10 DF,  p-value: 1.97e-09

Berdasarkan output di atas, diperoleh model regresi linear y = -816.54+0.44x

Pendeteksian Autokorelasi

Eksploratif

1. Plot Residual

#sisaan dan fitted value
resi1 <- residuals(model)
fit <- predict(model)

#Diagnostik dengan eksploratif
par(mfrow = c(2,2))

qqnorm(resi1)
qqline(resi1, col = "steelblue", lwd = 2)

plot(fit, resi1, col = "steelblue", pch = 20, xlab = "Sisaan", 
     ylab = "Fitted Values", main = "Sisaan vs Fitted Values")
abline(a = 0, b = 0, lwd = 2)

hist(resi1, col = "steelblue")

plot(seq(1,12,1), resi1, col = "steelblue", pch = 20, 
     xlab = "Sisaan", ylab = "Order", main = "Sisaan vs Order")
lines(seq(1,12,1), resi1, col = "red")
abline(a = 0, b = 0, lwd = 2)

Pada grafik di atas, terlihat bahwa data sudah menyebar normal, ragam cenderung homogen, data menjulur ke kiri, dan diduga terdapat autokorelasi.

2. Plot ACF & PACF

par(mfrow = c(1,2))
acf(resi1)
pacf(resi1)

Pada plot tersebut terdapat garis vertikal yang melewati garis horizontal yang berwarna biru sehingga dapat diduga terdapat autokorelasi.

Uji Formal

3. Durbin Watson Test

Hipotesis H0: Tidak ada autokorelasi H1: Ada autokorelasi

lmtest::dwtest(model, alternative = 'two.sided') #ada autokorelasi
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 0.53015, p-value = 0.0001284
## alternative hypothesis: true autocorrelation is not 0

Berdasarkan output uji di atas, didapatkan p−value=0.0001284 < (5%) sehingga tolak H0. Artinya, pada taraf nyata 5% cukup bukti untuk menyatakan bahwa terdapat autokorelasi.

4. Breusch-Godfrey Test

Hipotesis H0: Tidak ada autokorelasi H1: Ada autokorelasi

lmtest::bgtest(y ~ x, data=dataregresi, order=1) #Perform Breusch-Godfrey test for first-order serial correlation
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  y ~ x
## LM test = 5.1467, df = 1, p-value = 0.02329

Berdasarkan output uji di atas, didapatkan p−value = 0.02 < (5%) sehingga tolak H0. Artinya, pada taraf nyata 5% cukup bukti untuk menyatakan bahwa terdapat autokorelasi.

Penanganan Autokorelasi

1. Cochrane-Orcutt

modelco <- cochrane.orcutt(model, convergence = 4, max.iter = 1000)
modelco
## Cochrane-orcutt estimation for first order autocorrelation 
##  
## Call:
## lm(formula = y ~ x, data = dataregresi)
## 
##  number of interaction: 185
##  rho 0.882051
## 
## Durbin-Watson statistic 
## (original):    0.53015 , p-value: 6.42e-05
## (transformed): 1.85597 , p-value: 2.592e-01
##  
##  coefficients: 
## (Intercept)           x 
## -156.599042    0.118213
#rho optimum
rho <- modelco$rho
rho
## [1] 0.8820513

Berdasarkan hasil di atas, didapatkan nilai rho optimum sebesar 0.882.

rho <- modelco$rho
y
##  [1] 76.31 76.98 77.53 78.08 78.39 78.99 79.60 80.06 80.47 80.76 80.77 81.11
y[-1]
##  [1] 76.98 77.53 78.08 78.39 78.99 79.60 80.06 80.47 80.76 80.77 81.11
y[-12]
##  [1] 76.31 76.98 77.53 78.08 78.39 78.99 79.60 80.06 80.47 80.76 80.77
(y.trans <- y[-1]-y[-12]*rho)
##  [1] 9.670666 9.629692 9.694564 9.519435 9.845999 9.926769 9.848717 9.852974
##  [9] 9.781333 9.535538 9.866717
(x.trans <- x[-1]-x[-12]*rho)
##  [1] 238.0769 238.1949 238.3128 238.4308 238.5487 238.6667 238.7846 238.9026
##  [9] 239.0205 239.1384 239.2564
#model baru
modelcorho <- lm(y.trans~x.trans)
summary(modelcorho) 
## 
## Call:
## lm(formula = y.trans ~ x.trans)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.263180 -0.032017 -0.002564  0.086985  0.183823 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18.4707    27.0164  -0.684    0.511
## x.trans       0.1182     0.1132   1.044    0.324
## 
## Residual standard error: 0.14 on 9 degrees of freedom
## Multiple R-squared:  0.1081, Adjusted R-squared:  0.008978 
## F-statistic: 1.091 on 1 and 9 DF,  p-value: 0.3236

Pengecekan Ulang Autokorelasi

lmtest::dwtest(modelcorho,alternative = 'two.sided') 
## 
##  Durbin-Watson test
## 
## data:  modelcorho
## DW = 1.856, p-value = 0.5185
## alternative hypothesis: true autocorrelation is not 0

Setelah dilakukan penanganan Cochrane-Orcutt, didapatkan p-value = 0.5185 > 0.05 sehingga tak tolak H0, artinya tidak cukup bukti untuk menyatakan bahwa terdapat autokorelasi pada taraf 5%.

# Hildreth-Lu
hildreth.lu.func<- function(r, model){
  x <- model.matrix(model)[,-1]
  y <- model.response(model.frame(model))
  n <- length(y)
  t <- 2:n
  y <- y[t]-r*y[t-1]
  x <- x[t]-r*x[t-1]
  
  return(lm(y~x))
}

#mencari rho yang meminimumkan SSE (iteratif)
r <- c(seq(0.1,0.8, by= 0.1), seq(0.9,0.99, by= 0.01))
tab <- data.frame("rho" = r, "SSE" = sapply(r, function(i){deviance(hildreth.lu.func(i, model))}))
round(tab, 4) #0,40 memiliki SSE Terkecil
##     rho    SSE
## 1  0.10 0.4607
## 2  0.20 0.3942
## 3  0.30 0.3366
## 4  0.40 0.2878
## 5  0.50 0.2478
## 6  0.60 0.2167
## 7  0.70 0.1944
## 8  0.80 0.1809
## 9  0.90 0.1763
## 10 0.91 0.1763
## 11 0.92 0.1764
## 12 0.93 0.1766
## 13 0.94 0.1769
## 14 0.95 0.1773
## 15 0.96 0.1778
## 16 0.97 0.1783
## 17 0.98 0.1790
## 18 0.99 0.1797
#grafik rho dan SSE
plot(tab$SSE ~ tab$rho , type = "l")
abline(v = tab[tab$SSE==min(tab$SSE),"rho"], lty = 3)

#mencari rho yang meminimumkan SSE (iteratif)
r <- c(seq(0.1,0.999, by= 0.001))
tab <- data.frame("rho" = r, "SSE" = sapply(r, function(i){deviance(hildreth.lu.func(i, model))}))
tab$rho[which.min(tab$SSE)]#rho optimal
## [1] 0.902

Terlihat bahwa ρ optimum yang menghasilkan SSE terkecil berada di sekitar 0.90. Sehingga pada iterasi selanjutnya, selang akan diperkecil.

#rho optimal di sekitar 0.90
r <- seq(0.80, 0.95, by= 0.01)
tab <- data.frame("rho" = r, "SSE" = sapply(r, function(i){deviance(hildreth.lu.func(i, model))}))
round(tab, 4) #0.90 memiliki SSE Terkecil
##     rho    SSE
## 1  0.80 0.1809
## 2  0.81 0.1801
## 3  0.82 0.1793
## 4  0.83 0.1786
## 5  0.84 0.1780
## 6  0.85 0.1775
## 7  0.86 0.1771
## 8  0.87 0.1768
## 9  0.88 0.1765
## 10 0.89 0.1764
## 11 0.90 0.1763
## 12 0.91 0.1763
## 13 0.92 0.1764
## 14 0.93 0.1766
## 15 0.94 0.1769
## 16 0.95 0.1773
#grafik SSE optimum
plot(tab$SSE ~ tab$rho , type = "l")
abline(v = tab[tab$SSE==min(tab$SSE),"rho"], lty = 3)

Pada selang yang telah diperkecil, nilai ρ optimum yang menghasilkan SSE terkecil tetap sebesar 0.90.

# Model terbaik
modelhl <- hildreth.lu.func(0.90, model)
summary(modelhl)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.262945 -0.031786  0.002045  0.082941  0.182273 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.92123   27.02291  -0.108    0.916
## x            0.05555    0.13345   0.416    0.687
## 
## Residual standard error: 0.14 on 9 degrees of freedom
## Multiple R-squared:  0.01889,    Adjusted R-squared:  -0.09013 
## F-statistic: 0.1733 on 1 and 9 DF,  p-value: 0.687

Pengecekan Ulang Autokorelasi

# Deteksi autokorelasi
lmtest::dwtest(modelhl)
## 
##  Durbin-Watson test
## 
## data:  modelhl
## DW = 1.8932, p-value = 0.2811
## alternative hypothesis: true autocorrelation is greater than 0

Setelah dilakukan penanganan Hildreth Lu, didapatkan p-value = 0.2811 > 0.05 sehingga tak tolak H0, artinya tidak cukup bukti untuk menyatakan bahwa terdapat autokorelasi pada taraf 5%. Dengan demikian, penanganan berhasil dilakukan

# Transformasi Balik
cat("y = ", coef(modelhl)[1]/(1-0.90), "+", coef(modelhl)[2],"x", sep = "") #persamaan regresi setelah di transformasi ke persamaan awal
## y = -29.21227+0.05554545x
#fungsi Hildreth Lu dengan library HoRM
modelhl2 <- HoRM::hildreth.lu(y, x, 0.90) #fungsi hildreth lu dengan rho tertentu
summary(modelhl2)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.262945 -0.031786  0.002045  0.082941  0.182273 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.92123   27.02291  -0.108    0.916
## x            0.05555    0.13345   0.416    0.687
## 
## Residual standard error: 0.14 on 9 degrees of freedom
## Multiple R-squared:  0.01889,    Adjusted R-squared:  -0.09013 
## F-statistic: 0.1733 on 1 and 9 DF,  p-value: 0.687