Email: diandestin@gmail.com
Mata Kuliah: Spasial
Dosen Pengampu: Dr. I Gede Nyoman Mindra Jaya, M.Si
setwd("C:/Users/HP/Downloads")
# Jabar Shape
jabar <- read_sf("C:/Users/HP/Downloads/[geosai.my.id]Jawa_Barat_Kab/Jawa_Barat_ADMIN_BPS.shp")
jabar_shp <- read_sf("C:/Users/HP/Downloads/[geosai.my.id]Jawa_Barat_Kab/Jawa_Barat_ADMIN_BPS.shp") %>%
as_Spatial()
Data Curah Hujan (Total Precipitation) di Jawa Barat diperoleh dari ERA-5 dengan total 290 titik pada 10 Oktober 2024
# Climate data store
library(reticulate)
use_virtualenv("r-reticulate")
#py_install("cdsapi", envname = "r-reticulate")
cdsapi <- import('cdsapi')
server = cdsapi$Client()
query <- r_to_py(list(
variable= c('total_precipitation','2m_temperature'),
year= "2024",
month= "10",
day="10",
time= c("02:00","03:00","04:00","05:00","06:00","07:00","08:00"),
format= "netcdf",
area = "-5.918148/106.370304/-7.820979/108.846881" # North, West, South, East
))
server$retrieve("reanalysis-era5-land",
query,
"era5_tp_2024_10_10.nc")
## [1] "era5_tp_2024_10_10.nc"
nc <- nc_open("era5_tp_2024_10_10.nc")
lat <- ncvar_get(nc,'latitude')
lon <- ncvar_get(nc,'longitude')
dim(lat);dim(lon)
## [1] 20
## [1] 25
t2m <- ncvar_get(nc,"t2m")-273.15 # Change to Celcius
tp <- ncvar_get(nc, "tp")
time <- ncvar_get(nc, "valid_time")
ncatt_get(nc,'valid_time')
## $long_name
## [1] "time"
##
## $standard_name
## [1] "time"
##
## $units
## [1] "seconds since 1970-01-01"
##
## $calendar
## [1] "proleptic_gregorian"
timestamp <- as_datetime(c(time),origin="1970-01-01");timestamp
## [1] "2024-10-10 02:00:00 UTC" "2024-10-10 03:00:00 UTC"
## [3] "2024-10-10 04:00:00 UTC" "2024-10-10 05:00:00 UTC"
## [5] "2024-10-10 06:00:00 UTC" "2024-10-10 07:00:00 UTC"
## [7] "2024-10-10 08:00:00 UTC"
indices <- expand.grid(lon, lat, timestamp)
df <- data.frame(lon=indices$Var1,lat=indices$Var2,timestamp=indices$Var3,tp=as.vector(tp),t2m=as.vector(t2m))
df <- df %>%
group_by(lon,lat) %>%
slice(which.max(t2m))
spdf <- SpatialPointsDataFrame(df[,1:2], df[,3:4] , proj4string = CRS(proj4string(jabar_shp)))
spdf <- spdf[jabar_shp, ]
# Longitude dan Latitude titik pengamatan dan Jawa Barat
bbox(spdf)
## min max
## lon 106.47 108.771
## lat -7.72 -6.020
bbox(jabar_shp)
## min max
## x 106.370304 108.846881
## y -7.820979 -5.918148
spdf@data
## # A tibble: 290 × 2
## timestamp tp
## <dttm> <dbl>
## 1 2024-10-10 05:00:00 0.0000893
## 2 2024-10-10 05:00:00 0.000200
## 3 2024-10-10 06:00:00 0.000377
## 4 2024-10-10 06:00:00 0.000459
## 5 2024-10-10 06:00:00 0.000551
## 6 2024-10-10 06:00:00 0.000598
## 7 2024-10-10 06:00:00 0.000512
## 8 2024-10-10 06:00:00 0.000344
## 9 2024-10-10 06:00:00 0.000181
## 10 2024-10-10 05:00:00 0.000170
## # ℹ 280 more rows
nc_close(nc)
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:rcompanion':
##
## phi
## The following object is masked from 'package:raster':
##
## distance
## The following objects are masked from 'package:scales':
##
## alpha, rescale
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
summary(spdf$tp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 6.556e-05 1.747e-04 2.724e-04 3.434e-04 1.584e-03
## 70%:30% Random
set.seed(123)
TrainIndex1 <- sample(length(spdf), length(spdf)*7/10)
spdf_train_1 <- spdf[TrainIndex1,]
spdf_test_1 <- spdf[-TrainIndex1,]
colors_1 <- c("Train Data" = "blue", "Test Data" = "gray")
p1 <- ggplot() +
geom_sf(data=jabar) +
geom_point(data=as.data.frame(spdf_train_1),aes(x=lon,y=lat, color = "Train Data")) +
geom_point(data=as.data.frame(spdf_test_1),aes(x=lon,y=lat,color="Test Data")) +
scale_color_manual(values=colors_1) +
labs(color="", x="Longitude",y="Latitude") +
theme_bw()
p2 <- ggplot() +
geom_tile(data=as.data.frame(spdf), aes(x=lon,y=lat,fill=tp)) +
geom_sf(data=jabar, fill = NA) +
scale_fill_viridis_c(option = "C") +
labs(fill="tp", x="Longitude",y="Latitude") +
theme_bw()
grid.arrange(p2,p1,ncol=2)
# uji normalitas
Boxplot(tp, main = "Boxplot of Rainfall", ylab = "Rainfall")
## [1] 3289 3314 3264 3290 3339 3313 3315 3265 3288 3338
shapiro.test(tp)
##
## Shapiro-Wilk normality test
##
## data: tp
## W = 0.62303, p-value < 2.2e-16
plotNormalHistogram(tp, main = "Histogram of Rainfall",
xlab = "Rainfall", ylab = "Frequency")
Karena jumlah data besar, maka gunakan peendekatan Central Limit Theorem
\[ \hat{\gamma}(h) = \frac{1}{2N(h)} \sum_{i=1}^{N(h)} \left( z(x_i) - z(x_i + h) \right)^2 \]
Di mana:
- \(\hat{\gamma}(h)\) adalah nilai semivariogram pada lag \(h\),
- \(N(h)\) adalah jumlah pasangan titik dengan jarak \(h\),
- \(z(x_i)\) adalah nilai variabel pada titik \(x_i\),
- \(z(x_i + h)\) adalah nilai variabel pada titik dengan jarak \(h\) dari \(x_i\).
variogram_eks = variogram(tp~1, data = spdf_train_1, cloud = F)
ggplot(variogram_eks, aes(x = dist, y = gamma)) +
geom_point() +
labs(title = "Experimental Semivariogram",
x = "Distance",
y = "Semivariance") +
theme_minimal()
\[ \gamma(h) = \begin{cases} C_0 + C \left[ \frac{3h}{2a} - \frac{h^3}{2a^3} \right], & \text{untuk } 0 < h \leq a \\ C_0 + C, & \text{untuk } h > a \end{cases} \]
Di mana:
- \(\gamma(h)\) adalah nilai semivariogram pada lag \(h\),
- \(C_0\) adalah nugget effect,
- \(C\) adalah sill (kontribusi variabilitas struktural),
- \(a\) adalah range (jarak maksimum di mana titik-titik masih memiliki hubungan).
sill = var(spdf_train_1$tp)
sph.model = vgm(psill = sill, model = "Sph", width = 0.001)
sph.fit = fit.variogram(object = variogram_eks, model = sph.model)
sph.fit
## model psill range
## 1 Sph 1.12627e-07 163.6319
plot(variogram_eks, pl = F, model = sph.fit, col = "blue", cex = 1, lwd = 0.5, lty = 1, pch = 20,
main = "Theoretical Model - Spherical", xlab = "Distance (m)", ylab = "Semivariance")
\[ \gamma(h) = C_0 + C \left( 1 - e^{-\frac{h}{a}} \right) \]
Di mana:
- \(\gamma(h)\) adalah nilai semivariogram pada lag \(h\),
- \(C_0\) adalah nugget effect,
- \(C\) adalah sill (kontribusi variabilitas struktural),
- \(a\) adalah range parameter (mengontrol tingkat kenaikan semivariogram; bukan range maksimum seperti pada model spherical).
exp.model = vgm(psill = sill, model = "Exp", width = 0.001)
exp.fit = fit.variogram(object = variogram_eks, model = exp.model)
## Warning in fit.variogram(object = variogram_eks, model = exp.model): No
## convergence after 200 iterations: try different initial values?
exp.fit
## model psill range
## 1 Exp 6.392365e-05 63727.12
plot(variogram_eks, pl = F, model = exp.fit, col = "blue", cex = 1, lwd = 0.5, lty = 1, pch = 20,
main = "Theoretical Model - Exponential", xlab = "Distance (m)", ylab = "Semivariance")
\[ \gamma(h) = C_0 + C \left( 1 - e^{-\frac{h^2}{a^2}} \right) \]
Di mana:
- \(\gamma(h)\) adalah nilai semivariogram pada lag \(h\),
- \(C_0\) adalah nugget effect,
- \(C\) adalah sill (kontribusi variabilitas struktural),
- \(a\) adalah range parameter (mengontrol tingkat kenaikan semivariogram dengan lebih tajam mendekati sill).
gau.model = vgm(psill = sill, model = "Gau", width = 0.001)
gau.fit = fit.variogram(object = variogram_eks, model = gau.model)
gau.fit
## model psill range
## 1 Gau 7.260695e-08 39.44676
plot(variogram_eks, pl = F, model = gau.fit, col = "blue", cex = 1, lwd = 0.5, lty = 1, pch = 20,
main = "Theoretical Model - Gaussian", xlab = "Distance (m)", ylab = "Semivariance")
Rumus Sum-Square Error: \[ \text{SSE} = \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 \]
Di mana:
- \(y_i\) adalah nilai observasi sebenarnya,
- \(\hat{y}_i\) adalah nilai prediksi model,
- \(n\) adalah jumlah data.
Model dengan nilai SSE terendah adalah model yang terbaik karena menunjukkan bahwa model tersebut memiliki kesalahan prediksi yang paling kecil.
# Assuming your_data is a SpatialPointsDataFrame object
distances <- spDists(coordinates(spdf_train_1))
max_distance <- max(distances)
max_distance
## [1] 2.506798
sph_var <- variogramLine(sph.fit, maxdist = 0.1)
exp_var <- variogramLine(exp.fit, maxdist = max_distance)
gau_var <- variogramLine(gau.fit, maxdist = 6)
attributes(sph.fit)$SSErr
## [1] 9.269167e-17
attributes(exp.fit)$SSErr
## [1] 7.012075e-17
attributes(gau.fit)$SSErr
## [1] 2.179346e-16
Berdasarkan Sum-Square Error terkecil yaitu Model Exponential.
# point kriging
interpolasi = krige(tp~1, spdf_train_1, spdf_test_1, model = exp.fit)
## [using ordinary kriging]
interpolasi_full = krige(tp~1, spdf_train_1, spdf, model = exp.fit)
## [using ordinary kriging]
data_aktual = spdf_test_1$tp
data_prediksi = interpolasi$var1.pred
hasil_prediksi = data.frame(data_aktual, data_prediksi)
hasil_prediksi
## data_aktual data_prediksi
## 1 1.998022e-04 1.935030e-04
## 2 3.765640e-04 3.698295e-04
## 3 3.438086e-04 3.347692e-04
## 4 2.838230e-04 2.772533e-04
## 5 3.422272e-04 3.574404e-04
## 6 3.728344e-04 3.809075e-04
## 7 2.277255e-04 2.595431e-04
## 8 2.911459e-04 2.905373e-04
## 9 3.640455e-04 3.050263e-04
## 10 3.398885e-05 1.004893e-04
## 11 2.553283e-04 2.318970e-04
## 12 3.341590e-04 2.010493e-04
## 13 3.487308e-04 2.810328e-04
## 14 2.193248e-04 2.506354e-04
## 15 2.165989e-04 2.204961e-04
## 16 3.574586e-04 2.769584e-04
## 17 1.312792e-04 1.293530e-04
## 18 8.593150e-05 9.823335e-05
## 19 1.411327e-04 1.249417e-04
## 20 1.668173e-04 2.025708e-04
## 21 2.529238e-04 3.119715e-04
## 22 3.948063e-05 4.780616e-05
## 23 9.840669e-05 8.955819e-05
## 24 1.848318e-04 1.825308e-04
## 25 1.477652e-04 1.565555e-04
## 26 1.177578e-04 2.819079e-04
## 27 4.065552e-04 3.766022e-04
## 28 9.125879e-05 1.122252e-04
## 29 6.286963e-05 5.842789e-05
## 30 3.269097e-04 3.223843e-04
## 31 3.587769e-05 3.787362e-05
## 32 1.636358e-04 1.579604e-04
## 33 8.633878e-05 8.902832e-05
## 34 2.155820e-04 1.731253e-04
## 35 5.644653e-05 6.931825e-05
## 36 1.975265e-05 2.665974e-05
## 37 7.245515e-05 7.322214e-05
## 38 3.284903e-05 4.860009e-05
## 39 7.915427e-06 5.039702e-06
## 40 5.838228e-06 3.723626e-06
## 41 2.831221e-06 1.510065e-06
## 42 1.710866e-04 1.449959e-04
## 43 1.208264e-04 1.012025e-04
## 44 1.726593e-04 1.730586e-04
## 45 2.068871e-04 1.711523e-04
## 46 1.553675e-04 1.464600e-04
## 47 8.923106e-05 8.187355e-05
## 48 2.464128e-05 2.869432e-05
## 49 2.088491e-07 -1.444543e-06
## 50 1.032211e-03 7.264993e-04
## 51 5.499988e-04 4.270226e-04
## 52 2.264351e-04 1.771272e-04
## 53 2.425630e-06 1.770207e-06
## 54 0.000000e+00 -2.223131e-06
## 55 1.122445e-03 8.482555e-04
## 56 9.016396e-04 6.846557e-04
## 57 1.796374e-04 9.662852e-05
## 58 2.425630e-06 2.998443e-07
## 59 0.000000e+00 -3.637263e-06
## 60 0.000000e+00 -5.118813e-06
## 61 6.356591e-05 9.860909e-05
## 62 7.430309e-04 7.415125e-04
## 63 6.928573e-04 6.455103e-04
## 64 5.198681e-04 4.921351e-04
## 65 2.562841e-04 2.668132e-04
## 66 4.897487e-06 6.868747e-06
## 67 9.370424e-04 9.198109e-04
## 68 9.385032e-04 8.406105e-04
## 69 4.305984e-04 4.014643e-04
## 70 1.606940e-04 1.549160e-04
## 71 1.356029e-05 6.447711e-06
## 72 3.660796e-06 3.270359e-06
## 73 3.247837e-05 8.783753e-06
## 74 8.601863e-04 8.531231e-04
## 75 8.022676e-04 8.146850e-04
## 76 8.737290e-04 8.542771e-04
## 77 7.301501e-04 6.944510e-04
## 78 1.508694e-04 1.794575e-04
## 79 3.469177e-07 3.963210e-06
## 80 7.529479e-04 7.228361e-04
## 81 6.001542e-04 5.988525e-04
## 82 3.478562e-04 3.692698e-04
## 83 1.102044e-04 1.596531e-04
## 84 2.235267e-05 4.022490e-05
## 85 6.347672e-04 6.111345e-04
## 86 3.511916e-04 3.678015e-04
## 87 5.134565e-04 4.529436e-04
Rumus untuk Weighted Mean Absolute Percentage Error (WMAPE) adalah:
\[ \text{WMAPE} = \frac{\sum_{i=1}^{n} |y_i - \hat{y}_i|}{\sum_{i=1}^{n} |y_i|} \times 100\% \]
Di mana:
- \(y_i\) adalah nilai observasi sebenarnya,
- \(\hat{y}_i\) adalah nilai prediksi model,
- \(n\) adalah jumlah data.
Rumus untuk Root Mean Squared Error (RMSE) adalah:
\[ \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2} \]
Di mana:
- \(y_i\) adalah nilai observasi sebenarnya,
- \(\hat{y}_i\) adalah nilai prediksi model,
- \(n\) adalah jumlah data.
Data yang digunakan berskala sangat kecil mendekati nol, sehingga metriks evaluasi yang digunakan menggunakan WMAPE. Selain itu, RMSE juga digunakan untuk mengevaluasi.
# Fungsi untuk menghitung WMAPE
calculate_wmape <- function(actual, predicted) {
# Menghitung total kesalahan tertimbang
weighted_error <- sum(abs(actual - predicted))
# Menghitung total bobot untuk nilai aktual
total_weight <- sum(actual)
# Menghitung WMAPE
wmape <- (weighted_error / total_weight) * 100
return(wmape)
}
wmape = calculate_wmape(data_aktual, data_prediksi)
wmape
## [1] 11.76045
rmse = sqrt(mean((data_aktual - data_prediksi)^2))
rmse
## [1] 6.302382e-05
jabar_grid <- makegrid(jabar_shp, 20000)
jabar_pts <- SpatialPoints(jabar_grid, proj4string = CRS(proj4string(jabar_shp)))
jabar_x <- jabar_pts[jabar_shp, ]
interpolasi_full = krige(tp~1, spdf_train_1, jabar_x, model = exp.fit)
## [using ordinary kriging]
ggplot() +
geom_tile(data=as.data.frame(interpolasi_full), aes(x=x1,y=x2,fill=var1.pred)) +
geom_sf(data=jabar, fill = NA) +
scale_fill_viridis_c(option = "C")+
labs(fill="tp", x="Longitude",y="Latitude") +
theme_bw()