Resampling

Boostrap

jarak <-c(31,38,48,52,63,67,75,84,89,99)
emisi <-c(553,590,608,682,752,725,834,752,845,960)
data <- data.frame(jarak,emisi)
korelasi <- cor(data)[2,1]
korelasi
## [1] 0.9504378

Penduga rata-rata bagi korelasi

n <- nrow(data)
b <- 5000

boot <-numeric(b)
for (i in 1:b) boot[i] <- cor(sample(data,n,replace=T))[2,1]
mean(boot) #penduga rata-rata bagi korelasi
## [1] 0.9755163

Selang Kepercayaan 95% bagi korelasi

# selang kepercayaan 95% bagi korelasi
c(quantile(boot,0.025),quantile(boot,0.975))
##      2.5%     97.5% 
## 0.9504378 1.0000000

Pendugaan galat prediksi model dengan validasi silang

Visualisasi Hubungan Jarak dan Emisi

par(mfrow=c(2,2))

a <- seq(30, 100, 10) #sequence for plotting fits

# Model linier
L1 <- lm(emisi ~ jarak)
plot(jarak, emisi, main="Linear", pch=16)
yhat1 <- L1$coef[1] + L1$coef[2] * a
lines(a, yhat1, lwd=2)

# Model Kuadratik
L2 <- lm(emisi ~ jarak + I(jarak^2))
plot(jarak, emisi, main="Quadratic", pch=16)
yhat2 <- L2$coef[1] + L2$coef[2] * a + L2$coef[3] * a^2
lines(a, yhat2, lwd=2) 

# Model Exponensial
L3 <- lm(log(emisi) ~ jarak)
plot(jarak, emisi, main="Exponential", pch=16)
logyhat3 <- L3$coef[1] + L3$coef[2] * a
yhat3 <- exp(logyhat3)
lines(a, yhat3, lwd=2)

# Model LOG-LOG
L4 <- lm(log(emisi) ~ log(jarak))
plot(log(jarak), log(emisi), main="Log-Log", pch=16)
logyhat4 <- L4$coef[1] + L4$coef[2] * log(a)
lines(log(a), logyhat4, lwd=2)

Membangun 4 model (Linear, Kuadratik, Eksponensial, Log-Log) untuk Melihat Pengaruh Jarak terhadap Emisi

n <- length(emisi)
e1 <- e2 <- e3 <- e4 <- numeric(n)


for (k in 1:n) {
 y <- emisi[-k]
 x <- jarak[-k]
 
 J1 <- lm(y ~ x)    # model linier
 yhat1 <- J1$coef[1] + J1$coef[2] * jarak[k]
 e1[k] <- emisi[k] - yhat1
 
 J2 <- lm(y ~ x + I(x^2))      # model kuadratik
 yhat2 <- J2$coef[1] + J2$coef[2] * jarak[k] + J2$coef[3] * jarak[k]^2
 e2[k] <- emisi[k] - yhat2
 
J3 <- lm(log(y) ~ x)     # model eksponensial
 logyhat3 <- J3$coef[1] + J3$coef[2] * jarak[k]
 yhat3 <- exp(logyhat3)
 e3[k] <- emisi[k] - yhat3
 
 J4 <- lm(log(y) ~ log(x))     # model log-log
 logyhat4 <- J4$coef[1] + J4$coef[2] * log(jarak[k])
 yhat4 <- exp(logyhat4)
 e4[k] <- emisi[k] - yhat4
}

Perbandingan Galat Prediksi

Model <- c("Linear", "Kuadratik", "Eksponensial", "Log-Log")
Galat_Prediksi <- c(mean(e1^2), mean(e2^2) , mean(e3^2), mean(e4^2))  
df <- data.frame(Model,Galat_Prediksi)
df

Berdasarkan perbandingan 4 model (Linear, Kuadratik, Eksponensial, Log-Log) di atas, maka didapatkan model terbaik untuk Melihat Pengaruh Jarak terhadap Emisi adalah Model Eksponensial karena memiliki nilai galat prediksi paling kecil yaitu sebesar 2086.847.

HAFIZAH ILMA (Statistician)