Resampling
Boostrap
jarak <-c(31,38,48,52,63,67,75,84,89,99)
emisi <-c(553,590,608,682,752,725,834,752,845,960)
data <- data.frame(jarak,emisi)
korelasi <- cor(data)[2,1]
korelasi## [1] 0.9504378
Penduga rata-rata bagi korelasi
n <- nrow(data)
b <- 5000
boot <-numeric(b)
for (i in 1:b) boot[i] <- cor(sample(data,n,replace=T))[2,1]
mean(boot) #penduga rata-rata bagi korelasi## [1] 0.9755163
Selang Kepercayaan 95% bagi korelasi
# selang kepercayaan 95% bagi korelasi
c(quantile(boot,0.025),quantile(boot,0.975))## 2.5% 97.5%
## 0.9504378 1.0000000
Pendugaan galat prediksi model dengan validasi silang
Visualisasi Hubungan Jarak dan Emisi
par(mfrow=c(2,2))
a <- seq(30, 100, 10) #sequence for plotting fits
# Model linier
L1 <- lm(emisi ~ jarak)
plot(jarak, emisi, main="Linear", pch=16)
yhat1 <- L1$coef[1] + L1$coef[2] * a
lines(a, yhat1, lwd=2)
# Model Kuadratik
L2 <- lm(emisi ~ jarak + I(jarak^2))
plot(jarak, emisi, main="Quadratic", pch=16)
yhat2 <- L2$coef[1] + L2$coef[2] * a + L2$coef[3] * a^2
lines(a, yhat2, lwd=2)
# Model Exponensial
L3 <- lm(log(emisi) ~ jarak)
plot(jarak, emisi, main="Exponential", pch=16)
logyhat3 <- L3$coef[1] + L3$coef[2] * a
yhat3 <- exp(logyhat3)
lines(a, yhat3, lwd=2)
# Model LOG-LOG
L4 <- lm(log(emisi) ~ log(jarak))
plot(log(jarak), log(emisi), main="Log-Log", pch=16)
logyhat4 <- L4$coef[1] + L4$coef[2] * log(a)
lines(log(a), logyhat4, lwd=2)Membangun 4 model (Linear, Kuadratik, Eksponensial, Log-Log) untuk Melihat Pengaruh Jarak terhadap Emisi
n <- length(emisi)
e1 <- e2 <- e3 <- e4 <- numeric(n)
for (k in 1:n) {
y <- emisi[-k]
x <- jarak[-k]
J1 <- lm(y ~ x) # model linier
yhat1 <- J1$coef[1] + J1$coef[2] * jarak[k]
e1[k] <- emisi[k] - yhat1
J2 <- lm(y ~ x + I(x^2)) # model kuadratik
yhat2 <- J2$coef[1] + J2$coef[2] * jarak[k] + J2$coef[3] * jarak[k]^2
e2[k] <- emisi[k] - yhat2
J3 <- lm(log(y) ~ x) # model eksponensial
logyhat3 <- J3$coef[1] + J3$coef[2] * jarak[k]
yhat3 <- exp(logyhat3)
e3[k] <- emisi[k] - yhat3
J4 <- lm(log(y) ~ log(x)) # model log-log
logyhat4 <- J4$coef[1] + J4$coef[2] * log(jarak[k])
yhat4 <- exp(logyhat4)
e4[k] <- emisi[k] - yhat4
}Perbandingan Galat Prediksi
Model <- c("Linear", "Kuadratik", "Eksponensial", "Log-Log")
Galat_Prediksi <- c(mean(e1^2), mean(e2^2) , mean(e3^2), mean(e4^2))
df <- data.frame(Model,Galat_Prediksi)
dfBerdasarkan perbandingan 4 model (Linear, Kuadratik, Eksponensial, Log-Log) di atas, maka didapatkan model terbaik untuk Melihat Pengaruh Jarak terhadap Emisi adalah Model Eksponensial karena memiliki nilai galat prediksi paling kecil yaitu sebesar 2086.847.