Library

library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(tmap)
library(viridis)
## Loading required package: viridisLite
library(dismo)
## Loading required package: raster
## Loading required package: sp
library(gstat)
library(sp)
library(mapview)
library(automap)
library(ggplot2)
library(caret)
## Loading required package: lattice

Data

# Load data bawaan 'meuse'
data(meuse)
coordinates(meuse) <- ~x + y
proj4string(meuse) <- CRS("+init=epsg:28992")
## Warning in CPL_crs_from_input(x): GDAL Message 1: +init=epsg:XXXX syntax is
## deprecated. It might return a CRS with a non-EPSG compliant axis order. Further
## messages of this type will be suppressed.
# Membuat grid untuk interpolasi
data(meuse.grid)
gridded(meuse.grid) <- ~x + y
proj4string(meuse.grid) <- CRS("+init=epsg:28992")

# Pilih variabel yang ingin diinterpolasi
target <- "zinc"

Dataset meuse berisi kadar logam berat (termasuk zinc) di tanah sekitar Sungai Meuse. Sistem koordinat dikonversi ke bentuk spasial agar dapat diinterpolasi, dan grid dibentuk sebagai titik tak terukur yang akan diprediksi.

Metode Determenistik

Closest Observation

Nilai pada lokasi yang tak terukur, diambil dari titik pengamatan terdekat. Metode ini tidak mempertimbangkan jarak lain, hanya titik terdekat.

co_model <- gstat(id = "zinc", formula = zinc ~ 1, data = meuse, set = list(idp = 0))
co_pred <- predict(co_model, meuse.grid)
## [inverse distance weighted interpolation]
spplot(co_pred, "zinc.pred", main = "Closest Observation")

Metode ini menugaskan nilai dari titik terdekat. Cocok untuk data diskrit, tetapi hasil peta terlihat kasar karena tidak memperhitungkan jarak relatif antar titik.

Nearest Neighbor (NN)

Mirip dengan Closest Observation, tapi mempertimbangkan hanya satu titik (nmax = 1). Cocok untuk estimasi kasar dan cepat.

nn_model <- gstat(formula = zinc ~ 1, data = meuse, nmax = 1)
nn_pred <- predict(nn_model, meuse.grid)
## [inverse distance weighted interpolation]
spplot(nn_pred, "var1.pred", main = "Nearest Neighbor")

Transisi nilai antar sel lebih jelas dibanding CO, tapi tetap tidak halus.

Inverse Distance Weighting (IDW)

Nilai prediksi dihitung berdasarkan rata-rata berbobot jarak (semakin dekat, semakin besar bobotnya). Dengan idp = 2, artinya bobot berkurang secara kuadrat terhadap jarak.

idw_model <- gstat(formula = zinc ~ 1, data = meuse, set = list(idp = 2))
idw_pred <- predict(idw_model, meuse.grid)
## [inverse distance weighted interpolation]
spplot(idw_pred, "var1.pred", main = "IDW (idp = 2)")

Peta menjadi lebih halus, namun tetap bersifat deterministik tanpa mempertimbangkan hubungan spasial global.

Ensemble (CO + NN + IDW)

Nilai akhir adalah rata-rata dari tiga metode deterministik.Tujuannya untuk menyeimbangkan kelemahan dan kekuatan masing-masing metode.

meuse.grid$ensemble <- (
co_pred$zinc.pred +
nn_pred$var1.pred +
idw_pred$var1.pred
) / 3

spplot(meuse.grid, "ensemble", main = "Ensemble (CO + NN + IDW)")

Model Stokastik (Kriging)

vc <- variogram(log(zinc) ~ 1, meuse, cloud = TRUE, cutoff = 1000)
plot(vc, main = "Variogram Cloud")

# Variogram Standar
v <- variogram(log(zinc) ~ 1, data = meuse)
plot(v, main = "Empirical Variogram")

# Menentukan model variogram awal (Spherical)
vinitial <- vgm(psill = 0.5, model = "Sph", range = 900, nugget = 0.1)
plot(v, vinitial, cutoff = 1000, cex = 1.5, main = "Initial Variogram Model")

# Fit variogram ke data
fit_vgm <- fit.variogram(v, vinitial)
plot(v, fit_vgm, main = "Fitted Variogram Model")

# Ordinary Kriging
krig_pred <- krige(log(zinc) ~ 1, meuse, meuse.grid, model = fit_vgm)
## [using ordinary kriging]
spplot(krig_pred, "var1.pred", main = "Ordinary Kriging (log Zinc)")

spplot(krig_pred, "var1.var", main = "Kriging Variance (log Zinc)")

- Log-transformasi digunakan karena kadar logam berat (zinc) cenderung tidak berdistribusi normal (skewed).

Evaluasi Model (Cross Validation & RMSE)

RMSE <- function(obs, pred) sqrt(mean((obs - pred)^2, na.rm = TRUE))

set.seed(100)
folds <- createFolds(meuse$zinc, k = 5)

rmse1 <- rmse2 <- rmse3 <- rmse4 <- rmse5 <- c()


for (k in 1:5) {
  test_idx <- folds[[k]]
  train <- meuse[-test_idx, ]
  test <- meuse[test_idx, ]
  
  # Closest Observation
  co_model <- gstat(formula = zinc ~ 1, data = train, set = list(idp = 0))
  co_pred <- predict(co_model, test)
  
  # IDW
  idw_model <- gstat(formula = zinc ~ 1, data = train, set = list(idp = 2))
  idw_pred <- predict(idw_model, test)
  
  # Nearest Neighbor
  nn_model <- gstat(formula = zinc ~ 1, data = train, nmax = 1)
  nn_pred <- predict(nn_model, test)
  
  # Ensemble (CO + IDW + NN)
  p_ensemble <- (co_pred$var1.pred + idw_pred$var1.pred + nn_pred$var1.pred) / 3
  
  # Kriging
  v <- variogram(log(zinc) ~ 1, data = train)
  fit_vgm <- fit.variogram(v, vgm("Sph"))
  krig_pred <- krige(log(zinc) ~ 1, train, test, model = fit_vgm)
  p_krig <- exp(krig_pred$var1.pred)  # balikkan log
  
  # Hitung RMSE untuk masing-masing metode
  rmse1[k] <- RMSE(test$zinc, co_pred$var1.pred)
  rmse2[k] <- RMSE(test$zinc, idw_pred$var1.pred)
  rmse3[k] <- RMSE(test$zinc, nn_pred$var1.pred)
  rmse4[k] <- RMSE(test$zinc, p_ensemble)
  rmse5[k] <- RMSE(test$zinc, p_krig)
}
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [using ordinary kriging]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [using ordinary kriging]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [using ordinary kriging]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [using ordinary kriging]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [inverse distance weighted interpolation]
## [using ordinary kriging]

Tabel RMSE tiap split

data.frame(
  Closest.Obs = rmse1,
  IDW = rmse2,
  NN = rmse3,
  Ensemble = rmse4,
  Kriging = rmse5
)
##   Closest.Obs      IDW       NN Ensemble  Kriging
## 1    298.2874 227.1256 346.6639 249.5528 174.5216
## 2    457.7136 335.5457 323.1782 327.7477 257.7893
## 3    388.6685 295.6630 367.1764 292.7788 265.8496
## 4    264.2431 212.1013 202.7831 202.3232 182.8575
## 5    385.0171 292.4093 326.6646 316.1424 220.0191

Rata-rata RMSE

data.frame(
  Closest.Obs = mean(rmse1),
  IDW = mean(rmse2),
  NN = mean(rmse3),
  Ensemble = mean(rmse4),
  Kriging = mean(rmse5)
)
##   Closest.Obs     IDW       NN Ensemble  Kriging
## 1    358.7859 272.569 313.2932  277.709 220.2074

Kesimpulan Akhir

Berdasarkan nilai RMSE dan pola spasial hasil interpolasi, metode Ordinary Kriging merupakan pendekatan terbaik untuk memetakan distribusi logam berat zinc di floodplain Sungai Meuse. Transformasi logaritmik berhasil menstabilkan varian data dan meningkatkan akurasi prediksi.

Hasil ini menunjukkan pentingnya mempertimbangkan struktur spasial dalam pemodelan data geostatistik, khususnya pada kasus pencemaran lingkungan di wilayah sungai.