library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TTR)
## Warning: package 'TTR' was built under R version 4.1.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.1.3
library(lmtest) #uji-Durbin Watson
## Warning: package 'lmtest' was built under R version 4.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(orcutt) #Cochrane-Orcutt
## Warning: package 'orcutt' was built under R version 4.1.3
library(HoRM) #Hildreth Lu
## Warning: package 'HoRM' was built under R version 4.1.3
dataregresi <- read_excel("C:/Users/62812/Documents/SEMESTER 5/MPDW/data.xlsx", sheet="Sheet1")
head(dataregresi)
## # A tibble: 6 x 2
## TAHUN IPM
## <dbl> <dbl>
## 1 2010 76.3
## 2 2011 77.0
## 3 2012 77.5
## 4 2013 78.1
## 5 2014 78.4
## 6 2015 79.0
knitr::kable(dataregresi,align = "c")
| TAHUN | IPM |
|---|---|
| 2010 | 76.31 |
| 2011 | 76.98 |
| 2012 | 77.53 |
| 2013 | 78.08 |
| 2014 | 78.39 |
| 2015 | 78.99 |
| 2016 | 79.60 |
| 2017 | 80.06 |
| 2018 | 80.47 |
| 2019 | 80.76 |
| 2020 | 80.77 |
| 2021 | 81.11 |
Data di atas merupakan data Indeks Pembangunan Manusia (IPM) pada Provinsi DKI Jakarta dari tahun 2011-2021.
x <- dataregresi$TAHUN
y <- dataregresi$IPM
##diagram pencar identifikasi model
plot(x,y,pch = 20, col = "blue", main = "Scatter Plot X vs Y",
ylab = "Tahun", xlab = "Nilai IPM Provinsi DKI Jakarta")
#korelasi x dan y
cor(x,y)
## [1] 0.9879386
Terlihat bahwa keduanya memiliki hubungan kuat dan positif. Didukung dengan nilai korelasi yang dimiliki sangat tinggi, yaitu 0.987.
model <- lm(y~x, data = dataregresi)
summary(model)
##
## Call:
## lm(formula = y ~ x, data = dataregresi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42154 -0.16017 0.05061 0.16141 0.30594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -816.54150 44.39144 -18.39 4.86e-09 ***
## x 0.44437 0.02202 20.18 1.97e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2634 on 10 degrees of freedom
## Multiple R-squared: 0.976, Adjusted R-squared: 0.9736
## F-statistic: 407.1 on 1 and 10 DF, p-value: 1.97e-09
Berdasarkan output di atas, diperoleh model regresi linear y = -816.54+0.44x
#sisaan dan fitted value
resi1 <- residuals(model)
fit <- predict(model)
#Diagnostik dengan eksploratif
par(mfrow = c(2,2))
qqnorm(resi1)
qqline(resi1, col = "steelblue", lwd = 2)
plot(fit, resi1, col = "steelblue", pch = 20, xlab = "Sisaan",
ylab = "Fitted Values", main = "Sisaan vs Fitted Values")
abline(a = 0, b = 0, lwd = 2)
hist(resi1, col = "steelblue")
plot(seq(1,12,1), resi1, col = "steelblue", pch = 20,
xlab = "Sisaan", ylab = "Order", main = "Sisaan vs Order")
lines(seq(1,12,1), resi1, col = "red")
abline(a = 0, b = 0, lwd = 2)
Pada grafik di atas, terlihat bahwa data sudah menyebar normal, ragam cenderung homogen, data menjulur ke kiri, dan diduga terdapat autokorelasi.
par(mfrow = c(1,2))
acf(resi1)
pacf(resi1)
Pada plot tersebut terdapat garis vertikal yang melewati garis horizontal yang berwarna biru sehingga dapat diduga terdapat autokorelasi.
Hipotesis H0: Tidak ada autokorelasi H1: Ada autokorelasi
lmtest::dwtest(model, alternative = 'two.sided') #ada autokorelasi
##
## Durbin-Watson test
##
## data: model
## DW = 0.53015, p-value = 0.0001284
## alternative hypothesis: true autocorrelation is not 0
Berdasarkan output uji di atas, didapatkan p−value=0.0001284 < (5%) sehingga tolak H0. Artinya, pada taraf nyata 5% cukup bukti untuk menyatakan bahwa terdapat autokorelasi.
Hipotesis H0: Tidak ada autokorelasi H1: Ada autokorelasi
lmtest::bgtest(y ~ x, data=dataregresi, order=1) #Perform Breusch-Godfrey test for first-order serial correlation
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: y ~ x
## LM test = 5.1467, df = 1, p-value = 0.02329
Berdasarkan output uji di atas, didapatkan p−value = 0.02 < (5%) sehingga tolak H0. Artinya, pada taraf nyata 5% cukup bukti untuk menyatakan bahwa terdapat autokorelasi.
modelco <- cochrane.orcutt(model, convergence = 4, max.iter = 1000)
modelco
## Cochrane-orcutt estimation for first order autocorrelation
##
## Call:
## lm(formula = y ~ x, data = dataregresi)
##
## number of interaction: 185
## rho 0.882051
##
## Durbin-Watson statistic
## (original): 0.53015 , p-value: 6.42e-05
## (transformed): 1.85597 , p-value: 2.592e-01
##
## coefficients:
## (Intercept) x
## -156.599042 0.118213
#rho optimum
rho <- modelco$rho
rho
## [1] 0.8820513
Berdasarkan hasil di atas, didapatkan nilai rho optimum sebesar 0.882.
rho <- modelco$rho
y
## [1] 76.31 76.98 77.53 78.08 78.39 78.99 79.60 80.06 80.47 80.76 80.77 81.11
y[-1]
## [1] 76.98 77.53 78.08 78.39 78.99 79.60 80.06 80.47 80.76 80.77 81.11
y[-12]
## [1] 76.31 76.98 77.53 78.08 78.39 78.99 79.60 80.06 80.47 80.76 80.77
(y.trans <- y[-1]-y[-12]*rho)
## [1] 9.670666 9.629692 9.694564 9.519435 9.845999 9.926769 9.848717 9.852974
## [9] 9.781333 9.535538 9.866717
(x.trans <- x[-1]-x[-12]*rho)
## [1] 238.0769 238.1949 238.3128 238.4308 238.5487 238.6667 238.7846 238.9026
## [9] 239.0205 239.1384 239.2564
#model baru
modelcorho <- lm(y.trans~x.trans)
summary(modelcorho)
##
## Call:
## lm(formula = y.trans ~ x.trans)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.263180 -0.032017 -0.002564 0.086985 0.183823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18.4707 27.0164 -0.684 0.511
## x.trans 0.1182 0.1132 1.044 0.324
##
## Residual standard error: 0.14 on 9 degrees of freedom
## Multiple R-squared: 0.1081, Adjusted R-squared: 0.008978
## F-statistic: 1.091 on 1 and 9 DF, p-value: 0.3236
lmtest::dwtest(modelcorho,alternative = 'two.sided')
##
## Durbin-Watson test
##
## data: modelcorho
## DW = 1.856, p-value = 0.5185
## alternative hypothesis: true autocorrelation is not 0
Setelah dilakukan penanganan Cochrane-Orcutt, didapatkan p-value = 0.5185 > 0.05 sehingga tak tolak H0, artinya tidak cukup bukti untuk menyatakan bahwa terdapat autokorelasi pada taraf 5%.
# Hildreth-Lu
hildreth.lu.func<- function(r, model){
x <- model.matrix(model)[,-1]
y <- model.response(model.frame(model))
n <- length(y)
t <- 2:n
y <- y[t]-r*y[t-1]
x <- x[t]-r*x[t-1]
return(lm(y~x))
}
#mencari rho yang meminimumkan SSE (iteratif)
r <- c(seq(0.1,0.8, by= 0.1), seq(0.9,0.99, by= 0.01))
tab <- data.frame("rho" = r, "SSE" = sapply(r, function(i){deviance(hildreth.lu.func(i, model))}))
round(tab, 4) #0,40 memiliki SSE Terkecil
## rho SSE
## 1 0.10 0.4607
## 2 0.20 0.3942
## 3 0.30 0.3366
## 4 0.40 0.2878
## 5 0.50 0.2478
## 6 0.60 0.2167
## 7 0.70 0.1944
## 8 0.80 0.1809
## 9 0.90 0.1763
## 10 0.91 0.1763
## 11 0.92 0.1764
## 12 0.93 0.1766
## 13 0.94 0.1769
## 14 0.95 0.1773
## 15 0.96 0.1778
## 16 0.97 0.1783
## 17 0.98 0.1790
## 18 0.99 0.1797
#grafik rho dan SSE
plot(tab$SSE ~ tab$rho , type = "l")
abline(v = tab[tab$SSE==min(tab$SSE),"rho"], lty = 3)
#mencari rho yang meminimumkan SSE (iteratif)
r <- c(seq(0.1,0.999, by= 0.001))
tab <- data.frame("rho" = r, "SSE" = sapply(r, function(i){deviance(hildreth.lu.func(i, model))}))
tab$rho[which.min(tab$SSE)]#rho optimal
## [1] 0.902
Terlihat bahwa ρ optimum yang menghasilkan SSE terkecil berada di sekitar 0.90. Sehingga pada iterasi selanjutnya, selang akan diperkecil.
#rho optimal di sekitar 0.90
r <- seq(0.80, 0.95, by= 0.01)
tab <- data.frame("rho" = r, "SSE" = sapply(r, function(i){deviance(hildreth.lu.func(i, model))}))
round(tab, 4) #0.90 memiliki SSE Terkecil
## rho SSE
## 1 0.80 0.1809
## 2 0.81 0.1801
## 3 0.82 0.1793
## 4 0.83 0.1786
## 5 0.84 0.1780
## 6 0.85 0.1775
## 7 0.86 0.1771
## 8 0.87 0.1768
## 9 0.88 0.1765
## 10 0.89 0.1764
## 11 0.90 0.1763
## 12 0.91 0.1763
## 13 0.92 0.1764
## 14 0.93 0.1766
## 15 0.94 0.1769
## 16 0.95 0.1773
#grafik SSE optimum
plot(tab$SSE ~ tab$rho , type = "l")
abline(v = tab[tab$SSE==min(tab$SSE),"rho"], lty = 3)
Pada selang yang telah diperkecil, nilai ρ optimum yang menghasilkan SSE terkecil tetap sebesar 0.90.
# Model terbaik
modelhl <- hildreth.lu.func(0.90, model)
summary(modelhl)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.262945 -0.031786 0.002045 0.082941 0.182273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.92123 27.02291 -0.108 0.916
## x 0.05555 0.13345 0.416 0.687
##
## Residual standard error: 0.14 on 9 degrees of freedom
## Multiple R-squared: 0.01889, Adjusted R-squared: -0.09013
## F-statistic: 0.1733 on 1 and 9 DF, p-value: 0.687
# Deteksi autokorelasi
lmtest::dwtest(modelhl)
##
## Durbin-Watson test
##
## data: modelhl
## DW = 1.8932, p-value = 0.2811
## alternative hypothesis: true autocorrelation is greater than 0
Setelah dilakukan penanganan Hildreth Lu, didapatkan p-value = 0.2811 > 0.05 sehingga tak tolak H0, artinya tidak cukup bukti untuk menyatakan bahwa terdapat autokorelasi pada taraf 5%. Dengan demikian, penanganan berhasil dilakukan
# Transformasi Balik
cat("y = ", coef(modelhl)[1]/(1-0.90), "+", coef(modelhl)[2],"x", sep = "") #persamaan regresi setelah di transformasi ke persamaan awal
## y = -29.21227+0.05554545x
#fungsi Hildreth Lu dengan library HoRM
modelhl2 <- HoRM::hildreth.lu(y, x, 0.90) #fungsi hildreth lu dengan rho tertentu
summary(modelhl2)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.262945 -0.031786 0.002045 0.082941 0.182273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.92123 27.02291 -0.108 0.916
## x 0.05555 0.13345 0.416 0.687
##
## Residual standard error: 0.14 on 9 degrees of freedom
## Multiple R-squared: 0.01889, Adjusted R-squared: -0.09013
## F-statistic: 0.1733 on 1 and 9 DF, p-value: 0.687