Email: diandestin@gmail.com

Mata Kuliah: Spasial

Dosen Pengampu: Dr. I Gede Nyoman Mindra Jaya, M.Si

Data

Library

Shapefile Jawa Barat

setwd("C:/Users/HP/Downloads")
# Jabar Shape
jabar <- read_sf("C:/Users/HP/Downloads/[geosai.my.id]Jawa_Barat_Kab/Jawa_Barat_ADMIN_BPS.shp")

jabar_shp <- read_sf("C:/Users/HP/Downloads/[geosai.my.id]Jawa_Barat_Kab/Jawa_Barat_ADMIN_BPS.shp") %>%
  as_Spatial()

Dataset Curah Hujan

Data Curah Hujan (Total Precipitation) di Jawa Barat diperoleh dari ERA-5 dengan total 290 titik pada 10 Oktober 2024

# Climate data store
library(reticulate)
use_virtualenv("r-reticulate")
#py_install("cdsapi", envname = "r-reticulate")
cdsapi <- import('cdsapi')
server = cdsapi$Client()
query <- r_to_py(list(
  variable= c('total_precipitation','2m_temperature'),
  year= "2024",
  month= "10",
  day="10",
  time= c("02:00","03:00","04:00","05:00","06:00","07:00","08:00"),
  format= "netcdf",
  area = "-5.918148/106.370304/-7.820979/108.846881" # North, West, South, East
))

server$retrieve("reanalysis-era5-land",
                query,
                "era5_tp_2024_10_10.nc")
## [1] "era5_tp_2024_10_10.nc"
nc <- nc_open("era5_tp_2024_10_10.nc")

lat <- ncvar_get(nc,'latitude')
lon <- ncvar_get(nc,'longitude')
dim(lat);dim(lon)
## [1] 20
## [1] 25
t2m <- ncvar_get(nc,"t2m")-273.15 # Change to Celcius
tp <- ncvar_get(nc, "tp")

time <- ncvar_get(nc, "valid_time")

ncatt_get(nc,'valid_time')
## $long_name
## [1] "time"
## 
## $standard_name
## [1] "time"
## 
## $units
## [1] "seconds since 1970-01-01"
## 
## $calendar
## [1] "proleptic_gregorian"
timestamp <- as_datetime(c(time),origin="1970-01-01");timestamp
## [1] "2024-10-10 02:00:00 UTC" "2024-10-10 03:00:00 UTC"
## [3] "2024-10-10 04:00:00 UTC" "2024-10-10 05:00:00 UTC"
## [5] "2024-10-10 06:00:00 UTC" "2024-10-10 07:00:00 UTC"
## [7] "2024-10-10 08:00:00 UTC"
indices <- expand.grid(lon, lat, timestamp)

df <- data.frame(lon=indices$Var1,lat=indices$Var2,timestamp=indices$Var3,tp=as.vector(tp),t2m=as.vector(t2m))

df <- df %>%
  group_by(lon,lat) %>%
  slice(which.max(t2m))

spdf <- SpatialPointsDataFrame(df[,1:2], df[,3:4] , proj4string = CRS(proj4string(jabar_shp)))

spdf <- spdf[jabar_shp, ] 

# Longitude dan Latitude titik pengamatan dan Jawa Barat
bbox(spdf)
##        min     max
## lon 106.47 108.771
## lat  -7.72  -6.020
bbox(jabar_shp)
##          min        max
## x 106.370304 108.846881
## y  -7.820979  -5.918148
spdf@data
## # A tibble: 290 × 2
##    timestamp                  tp
##    <dttm>                  <dbl>
##  1 2024-10-10 05:00:00 0.0000893
##  2 2024-10-10 05:00:00 0.000200 
##  3 2024-10-10 06:00:00 0.000377 
##  4 2024-10-10 06:00:00 0.000459 
##  5 2024-10-10 06:00:00 0.000551 
##  6 2024-10-10 06:00:00 0.000598 
##  7 2024-10-10 06:00:00 0.000512 
##  8 2024-10-10 06:00:00 0.000344 
##  9 2024-10-10 06:00:00 0.000181 
## 10 2024-10-10 05:00:00 0.000170 
## # ℹ 280 more rows
nc_close(nc)
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:rcompanion':
## 
##     phi
## The following object is masked from 'package:raster':
## 
##     distance
## The following objects are masked from 'package:scales':
## 
##     alpha, rescale
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
summary(spdf$tp)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 6.556e-05 1.747e-04 2.724e-04 3.434e-04 1.584e-03

Split Data

## 70%:30% Random
set.seed(123)
TrainIndex1 <- sample(length(spdf), length(spdf)*7/10)
spdf_train_1 <- spdf[TrainIndex1,]
spdf_test_1 <- spdf[-TrainIndex1,]

colors_1 <- c("Train Data" = "blue", "Test Data" = "gray")

p1 <- ggplot() +
  geom_sf(data=jabar) +
  geom_point(data=as.data.frame(spdf_train_1),aes(x=lon,y=lat, color = "Train Data")) +
  geom_point(data=as.data.frame(spdf_test_1),aes(x=lon,y=lat,color="Test Data")) +
  scale_color_manual(values=colors_1) +
  labs(color="", x="Longitude",y="Latitude") +
  theme_bw()

p2 <- ggplot() +
  geom_tile(data=as.data.frame(spdf), aes(x=lon,y=lat,fill=tp)) +
  geom_sf(data=jabar, fill = NA) +
  scale_fill_viridis_c(option = "C") +
  labs(fill="tp", x="Longitude",y="Latitude") +
  theme_bw()

grid.arrange(p2,p1,ncol=2)

Asumsi

# uji normalitas
Boxplot(tp, main = "Boxplot of Rainfall", ylab = "Rainfall")

##  [1] 3289 3314 3264 3290 3339 3313 3315 3265 3288 3338
shapiro.test(tp)
## 
##  Shapiro-Wilk normality test
## 
## data:  tp
## W = 0.62303, p-value < 2.2e-16
plotNormalHistogram(tp, main = "Histogram of Rainfall", 
                    xlab = "Rainfall", ylab = "Frequency")

Karena jumlah data besar, maka gunakan peendekatan Central Limit Theorem

Semivariogram Eksperimental

\[ \hat{\gamma}(h) = \frac{1}{2N(h)} \sum_{i=1}^{N(h)} \left( z(x_i) - z(x_i + h) \right)^2 \]

Di mana:

- \(\hat{\gamma}(h)\) adalah nilai semivariogram pada lag \(h\),

- \(N(h)\) adalah jumlah pasangan titik dengan jarak \(h\),

- \(z(x_i)\) adalah nilai variabel pada titik \(x_i\),

- \(z(x_i + h)\) adalah nilai variabel pada titik dengan jarak \(h\) dari \(x_i\).

variogram_eks = variogram(tp~1, data = spdf_train_1, cloud = F)
ggplot(variogram_eks, aes(x = dist, y = gamma)) +
  geom_point() +
  labs(title = "Experimental Semivariogram",
       x = "Distance",
       y = "Semivariance") +
  theme_minimal()

Model Spherical

\[ \gamma(h) = \begin{cases} C_0 + C \left[ \frac{3h}{2a} - \frac{h^3}{2a^3} \right], & \text{untuk } 0 < h \leq a \\ C_0 + C, & \text{untuk } h > a \end{cases} \]

Di mana:

- \(\gamma(h)\) adalah nilai semivariogram pada lag \(h\),

- \(C_0\) adalah nugget effect,

- \(C\) adalah sill (kontribusi variabilitas struktural),

- \(a\) adalah range (jarak maksimum di mana titik-titik masih memiliki hubungan).

sill = var(spdf_train_1$tp)
sph.model = vgm(psill = sill, model = "Sph", width = 0.001)
sph.fit = fit.variogram(object = variogram_eks, model = sph.model)
sph.fit
##   model       psill    range
## 1   Sph 1.12627e-07 163.6319
plot(variogram_eks, pl = F, model = sph.fit, col = "blue", cex = 1, lwd = 0.5, lty = 1, pch = 20,
     main = "Theoretical Model - Spherical", xlab = "Distance (m)", ylab = "Semivariance")

Model Exponential

\[ \gamma(h) = C_0 + C \left( 1 - e^{-\frac{h}{a}} \right) \]

Di mana:

- \(\gamma(h)\) adalah nilai semivariogram pada lag \(h\),

- \(C_0\) adalah nugget effect,

- \(C\) adalah sill (kontribusi variabilitas struktural),

- \(a\) adalah range parameter (mengontrol tingkat kenaikan semivariogram; bukan range maksimum seperti pada model spherical).

exp.model = vgm(psill = sill, model = "Exp", width = 0.001)
exp.fit = fit.variogram(object = variogram_eks, model = exp.model)
## Warning in fit.variogram(object = variogram_eks, model = exp.model): No
## convergence after 200 iterations: try different initial values?
exp.fit
##   model        psill    range
## 1   Exp 6.392365e-05 63727.12
plot(variogram_eks, pl = F, model = exp.fit, col = "blue", cex = 1, lwd = 0.5, lty = 1, pch = 20,
     main = "Theoretical Model - Exponential", xlab = "Distance (m)", ylab = "Semivariance")

Model Gaussian

\[ \gamma(h) = C_0 + C \left( 1 - e^{-\frac{h^2}{a^2}} \right) \]

Di mana:

- \(\gamma(h)\) adalah nilai semivariogram pada lag \(h\),

- \(C_0\) adalah nugget effect,

- \(C\) adalah sill (kontribusi variabilitas struktural),

- \(a\) adalah range parameter (mengontrol tingkat kenaikan semivariogram dengan lebih tajam mendekati sill).

gau.model = vgm(psill = sill, model = "Gau", width = 0.001)
gau.fit = fit.variogram(object = variogram_eks, model = gau.model)
gau.fit
##   model        psill    range
## 1   Gau 7.260695e-08 39.44676
plot(variogram_eks, pl = F, model = gau.fit, col = "blue", cex = 1, lwd = 0.5, lty = 1, pch = 20,
     main = "Theoretical Model - Gaussian", xlab = "Distance (m)", ylab = "Semivariance")

Select Best Model

Rumus Sum-Square Error: \[ \text{SSE} = \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 \]

Di mana:

- \(y_i\) adalah nilai observasi sebenarnya,

- \(\hat{y}_i\) adalah nilai prediksi model,

- \(n\) adalah jumlah data.

Model dengan nilai SSE terendah adalah model yang terbaik karena menunjukkan bahwa model tersebut memiliki kesalahan prediksi yang paling kecil.

# Assuming your_data is a SpatialPointsDataFrame object
distances <- spDists(coordinates(spdf_train_1))
max_distance <- max(distances)
max_distance
## [1] 2.506798
sph_var <- variogramLine(sph.fit, maxdist = 0.1)
exp_var <- variogramLine(exp.fit, maxdist = max_distance)
gau_var <- variogramLine(gau.fit, maxdist = 6)

attributes(sph.fit)$SSErr
## [1] 9.269167e-17
attributes(exp.fit)$SSErr
## [1] 7.012075e-17
attributes(gau.fit)$SSErr
## [1] 2.179346e-16

Berdasarkan Sum-Square Error terkecil yaitu Model Exponential.

Prediction

# point kriging
interpolasi = krige(tp~1, spdf_train_1, spdf_test_1, model = exp.fit)
## [using ordinary kriging]
interpolasi_full = krige(tp~1, spdf_train_1, spdf, model = exp.fit)
## [using ordinary kriging]
data_aktual = spdf_test_1$tp
data_prediksi = interpolasi$var1.pred
hasil_prediksi = data.frame(data_aktual, data_prediksi)
hasil_prediksi
##     data_aktual data_prediksi
## 1  1.998022e-04  1.935030e-04
## 2  3.765640e-04  3.698295e-04
## 3  3.438086e-04  3.347692e-04
## 4  2.838230e-04  2.772533e-04
## 5  3.422272e-04  3.574404e-04
## 6  3.728344e-04  3.809075e-04
## 7  2.277255e-04  2.595431e-04
## 8  2.911459e-04  2.905373e-04
## 9  3.640455e-04  3.050263e-04
## 10 3.398885e-05  1.004893e-04
## 11 2.553283e-04  2.318970e-04
## 12 3.341590e-04  2.010493e-04
## 13 3.487308e-04  2.810328e-04
## 14 2.193248e-04  2.506354e-04
## 15 2.165989e-04  2.204961e-04
## 16 3.574586e-04  2.769584e-04
## 17 1.312792e-04  1.293530e-04
## 18 8.593150e-05  9.823335e-05
## 19 1.411327e-04  1.249417e-04
## 20 1.668173e-04  2.025708e-04
## 21 2.529238e-04  3.119715e-04
## 22 3.948063e-05  4.780616e-05
## 23 9.840669e-05  8.955819e-05
## 24 1.848318e-04  1.825308e-04
## 25 1.477652e-04  1.565555e-04
## 26 1.177578e-04  2.819079e-04
## 27 4.065552e-04  3.766022e-04
## 28 9.125879e-05  1.122252e-04
## 29 6.286963e-05  5.842789e-05
## 30 3.269097e-04  3.223843e-04
## 31 3.587769e-05  3.787362e-05
## 32 1.636358e-04  1.579604e-04
## 33 8.633878e-05  8.902832e-05
## 34 2.155820e-04  1.731253e-04
## 35 5.644653e-05  6.931825e-05
## 36 1.975265e-05  2.665974e-05
## 37 7.245515e-05  7.322214e-05
## 38 3.284903e-05  4.860009e-05
## 39 7.915427e-06  5.039702e-06
## 40 5.838228e-06  3.723626e-06
## 41 2.831221e-06  1.510065e-06
## 42 1.710866e-04  1.449959e-04
## 43 1.208264e-04  1.012025e-04
## 44 1.726593e-04  1.730586e-04
## 45 2.068871e-04  1.711523e-04
## 46 1.553675e-04  1.464600e-04
## 47 8.923106e-05  8.187355e-05
## 48 2.464128e-05  2.869432e-05
## 49 2.088491e-07 -1.444543e-06
## 50 1.032211e-03  7.264993e-04
## 51 5.499988e-04  4.270226e-04
## 52 2.264351e-04  1.771272e-04
## 53 2.425630e-06  1.770207e-06
## 54 0.000000e+00 -2.223131e-06
## 55 1.122445e-03  8.482555e-04
## 56 9.016396e-04  6.846557e-04
## 57 1.796374e-04  9.662852e-05
## 58 2.425630e-06  2.998443e-07
## 59 0.000000e+00 -3.637263e-06
## 60 0.000000e+00 -5.118813e-06
## 61 6.356591e-05  9.860909e-05
## 62 7.430309e-04  7.415125e-04
## 63 6.928573e-04  6.455103e-04
## 64 5.198681e-04  4.921351e-04
## 65 2.562841e-04  2.668132e-04
## 66 4.897487e-06  6.868747e-06
## 67 9.370424e-04  9.198109e-04
## 68 9.385032e-04  8.406105e-04
## 69 4.305984e-04  4.014643e-04
## 70 1.606940e-04  1.549160e-04
## 71 1.356029e-05  6.447711e-06
## 72 3.660796e-06  3.270359e-06
## 73 3.247837e-05  8.783753e-06
## 74 8.601863e-04  8.531231e-04
## 75 8.022676e-04  8.146850e-04
## 76 8.737290e-04  8.542771e-04
## 77 7.301501e-04  6.944510e-04
## 78 1.508694e-04  1.794575e-04
## 79 3.469177e-07  3.963210e-06
## 80 7.529479e-04  7.228361e-04
## 81 6.001542e-04  5.988525e-04
## 82 3.478562e-04  3.692698e-04
## 83 1.102044e-04  1.596531e-04
## 84 2.235267e-05  4.022490e-05
## 85 6.347672e-04  6.111345e-04
## 86 3.511916e-04  3.678015e-04
## 87 5.134565e-04  4.529436e-04

Evaluation Metrics

WMAPE and RMSE

Rumus untuk Weighted Mean Absolute Percentage Error (WMAPE) adalah:

\[ \text{WMAPE} = \frac{\sum_{i=1}^{n} |y_i - \hat{y}_i|}{\sum_{i=1}^{n} |y_i|} \times 100\% \]

Di mana:

- \(y_i\) adalah nilai observasi sebenarnya,

- \(\hat{y}_i\) adalah nilai prediksi model,

- \(n\) adalah jumlah data.

Rumus untuk Root Mean Squared Error (RMSE) adalah:

\[ \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2} \]

Di mana:

- \(y_i\) adalah nilai observasi sebenarnya,

- \(\hat{y}_i\) adalah nilai prediksi model,

- \(n\) adalah jumlah data.

Data yang digunakan berskala sangat kecil mendekati nol, sehingga metriks evaluasi yang digunakan menggunakan WMAPE. Selain itu, RMSE juga digunakan untuk mengevaluasi.

# Fungsi untuk menghitung WMAPE
calculate_wmape <- function(actual, predicted) {
  # Menghitung total kesalahan tertimbang
  weighted_error <- sum(abs(actual - predicted))
  
  # Menghitung total bobot untuk nilai aktual
  total_weight <- sum(actual)
  
  # Menghitung WMAPE
  wmape <- (weighted_error / total_weight) * 100
  return(wmape)
}

wmape = calculate_wmape(data_aktual, data_prediksi)
wmape
## [1] 11.76045
rmse = sqrt(mean((data_aktual - data_prediksi)^2))
rmse
## [1] 6.302382e-05

Plot Prediksi

jabar_grid <- makegrid(jabar_shp, 20000)
jabar_pts <- SpatialPoints(jabar_grid, proj4string = CRS(proj4string(jabar_shp)))

jabar_x <- jabar_pts[jabar_shp, ]                 

interpolasi_full = krige(tp~1, spdf_train_1, jabar_x, model = exp.fit)
## [using ordinary kriging]
ggplot() +
  geom_tile(data=as.data.frame(interpolasi_full), aes(x=x1,y=x2,fill=var1.pred)) +
  geom_sf(data=jabar, fill = NA) +
  scale_fill_viridis_c(option = "C")+
  labs(fill="tp", x="Longitude",y="Latitude") +
  theme_bw()