# Library Bawaan & Manipulasi Data
library(readxl)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
library(forecast)
# Library Analisis Missing Value
library(naniar)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
# Library Visualisasi & Tabel Interaktif
library(ggplot2)
library(ggcorrplot)
library(DT)
# Input Data
data_met= read_xlsx("D:\\deska_baru2\\Data Meteorologi.xlsx")
str(data_met)
## tibble [12,053 × 8] (S3: tbl_df/tbl/data.frame)
## $ Tanggal : POSIXct[1:12053], format: "1990-01-01" "1990-01-02" ...
## $ Tn (suhu minimum) : num [1:12053] 19 19 20 19 19 20 19 19 20 20 ...
## $ Tx ( suhu maksimum) : num [1:12053] 28.4 27 26.8 28.4 28.6 22 27.8 28.2 28.2 24.8 ...
## $ Tavg (rata rata suhu) : num [1:12053] 22.2 22.5 23.1 22.5 21.5 20.3 22.3 23.3 23.2 21.8 ...
## $ RH_avg(kelembaban) : num [1:12053] 86 83 85 85 86 94 84 72 79 87 ...
## $ RR (curah hujan ) : num [1:12053] 6 47 0 12 16.5 1 13 1.5 2 0 ...
## $ ss ( penyinaran) : num [1:12053] 4.1 0.4 6.2 4.3 NA 0.1 5.2 2.8 5.1 0.4 ...
## $ ff_avg (angin rata rata): num [1:12053] 0 0 1 2 2 1 1 2 2 2 ...
head(data_met)
## # A tibble: 6 × 8
## Tanggal `Tn (suhu minimum)` `Tx ( suhu maksimum)`
## <dttm> <dbl> <dbl>
## 1 1990-01-01 00:00:00 19 28.4
## 2 1990-01-02 00:00:00 19 27
## 3 1990-01-03 00:00:00 20 26.8
## 4 1990-01-04 00:00:00 19 28.4
## 5 1990-01-05 00:00:00 19 28.6
## 6 1990-01-06 00:00:00 20 22
## # ℹ 5 more variables: `Tavg (rata rata suhu)` <dbl>,
## # `RH_avg(kelembaban)` <dbl>, `RR (curah hujan )` <dbl>,
## # `ss ( penyinaran)` <dbl>, `ff_avg (angin rata rata)` <dbl>
# Ganti nama kolom
data_met = data_met %>%
rename_with(~ str_remove(., "\\s*\\(.*\\)"))
str(data_met)
## tibble [12,053 × 8] (S3: tbl_df/tbl/data.frame)
## $ Tanggal: POSIXct[1:12053], format: "1990-01-01" "1990-01-02" ...
## $ Tn : num [1:12053] 19 19 20 19 19 20 19 19 20 20 ...
## $ Tx : num [1:12053] 28.4 27 26.8 28.4 28.6 22 27.8 28.2 28.2 24.8 ...
## $ Tavg : num [1:12053] 22.2 22.5 23.1 22.5 21.5 20.3 22.3 23.3 23.2 21.8 ...
## $ RH_avg : num [1:12053] 86 83 85 85 86 94 84 72 79 87 ...
## $ RR : num [1:12053] 6 47 0 12 16.5 1 13 1.5 2 0 ...
## $ ss : num [1:12053] 4.1 0.4 6.2 4.3 NA 0.1 5.2 2.8 5.1 0.4 ...
## $ ff_avg : num [1:12053] 0 0 1 2 2 1 1 2 2 2 ...
# Mengubah kode 99.9 dalam data menjadi NA
data_met[data_met==8888] = NA
summary(data_met)
## Tanggal Tn Tx Tavg
## Min. :1990-01-01 Min. :13.00 Min. :18.80 Min. :18.40
## 1st Qu.:1998-04-02 1st Qu.:19.00 1st Qu.:28.10 1st Qu.:22.80
## Median :2006-07-02 Median :20.00 Median :29.00 Median :23.40
## Mean :2006-07-02 Mean :19.38 Mean :29.01 Mean :23.39
## 3rd Qu.:2014-10-01 3rd Qu.:20.00 3rd Qu.:30.00 3rd Qu.:24.00
## Max. :2022-12-31 Max. :29.00 Max. :36.00 Max. :28.90
## NAs :578 NAs :326 NAs :156
## RH_avg RR ss ff_avg
## Min. :42.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:73.00 1st Qu.: 0.000 1st Qu.: 3.300 1st Qu.: 1.000
## Median :79.00 Median : 0.800 Median : 5.100 Median : 2.000
## Mean :77.99 Mean : 6.738 Mean : 4.924 Mean : 1.736
## 3rd Qu.:84.00 3rd Qu.: 7.800 3rd Qu.: 6.800 3rd Qu.: 2.000
## Max. :96.00 Max. :160.000 Max. :10.000 Max. :15.000
## NAs :330 NAs :1342 NAs :492 NAs :114
# Missing Value
data_met = data_met %>%
mutate(tahun = year(Tanggal))
missing_tahunan = data_met %>%
group_by(tahun) %>%
summarise(
RR_na = sum(is.na(RR)),
Tn_na = sum(is.na(Tn)),
Tx_na = sum(is.na(Tx))
)
library(DT)
# Membuat tabel interaktif
datatable(missing_tahunan,
options = list(pageLength = 33),
caption = 'Tabel Missing Value Tahunan')
# Visualisasi
# A. Menggunakan naniar: Melihat peta sebaran data kosong
# Hitam = Data ada, Abu-abu = Data kosong (NA)
vis_miss(data_met) +
labs(title = "Peta Sebaran Missing Value Data Meteorologi")
# B. Menggunakan VIM: Melihat kombinasi variabel yang sering kosong bersamaan
aggr(data_met,
col = c('navyblue','red'),
numbers = TRUE,
sortVars = TRUE,
labels = names(data_met),
cex.axis = .7,
gap = 3,
ylab = c("Proporsi Missing Data", "Pola Kombinasi Kosong"))
## Warning in plot.aggr(res, ...): not enough vertical space to display
## frequencies (too many combinations)
##
## Variables sorted by number of missings:
## Variable Count
## RR 0.111341575
## Tn 0.047954866
## ss 0.040819713
## RH_avg 0.027379076
## Tx 0.027047208
## Tavg 0.012942836
## ff_avg 0.009458226
## Tanggal 0.000000000
## tahun 0.000000000
# Little's MCAR Test
# H0: Data hilang secara Completely At Random (MCAR)
# H1: Data tidak hilang secara MCAR (bisa MAR atau MNAR)
uji_mcar = mcar_test(data_met[, 2:9])
uji_mcar
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 2242. 203 0 41
# Jika p.value > 0.05, sampaikan di laporan bahwa data hilang murni acak (MCAR).
# Korelasi
data_numeric = na.omit(data_met[, 2:8]) # Buang kolom Tanggal dan hapus NA sementara khusus untuk hitung korelasi
data_numeric = na.omit(data_met[, 2:8])
matriks_korelasi = cor(data_numeric)
# Visualisasi
ggcorrplot(matriks_korelasi,
method = "square",
type = "lower", # Hanya menampilkan separuh bawah matriks
lab = TRUE, # Menampilkan angka korelasi
colors = c("red", "white", "blue"),
title = "Matriks Korelasi Variabel Meteorologi Bandung")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
## Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
matriks_korelasi = cor(data_numeric)
# Histogram
data_long = data_met %>%
select(Tn, Tx, RR) %>%
pivot_longer(cols = everything(), names_to = "Variabel", values_to = "Nilai")
# Visualisasi
ggplot(data_long, aes(x = Variabel, y = Nilai, fill = Variabel)) +
geom_boxplot(alpha = 0.7, outlier.colour = "red", outlier.shape = 16) +
coord_flip() +
theme_minimal() +
labs(title = "Distribusi Suhu (Tn, Tx) & Curah Hujan (RR)",
x = "Variabel Iklim",
y = "Nilai Pengukuran")
## Warning: Removed 2246 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# Line chart untuk Curah Hujan (RR)
ggplot(data_met, aes(x = Tanggal, y = RR)) +
geom_line(color = "dodgerblue", alpha = 0.8) +
theme_minimal() +
labs(title = "Tren Runtun Waktu Curah Hujan (RR) Kota Bandung 1990-2022",
x = "Tahun",
y = "Curah Hujan (mm)")
# Line chart untuk Suhu Rata-rata (Tavg)
ggplot(data_met, aes(x = Tanggal, y = Tavg)) +
geom_line(color = "firebrick", alpha = 0.8) +
theme_minimal() +
labs(title = "Tren Runtun Waktu Suhu Rata-rata (Tavg) Kota Bandung 1990-2022",
x = "Tahun",
y = "Suhu Rata-rata (°C)")
# Data Ground Truth
gt_met = na.omit(data_met[,2:9])
anyNA(gt_met) # Cek data sudah bersih NA
## [1] FALSE
Duplikasi data dan hilangkan 5% nilai dalam data
set.seed(123)
uji_1 = gt_met
for (col in names(uji_1)) {
n_hilang = round(0.05 * nrow(uji_1))
idx = sample(1:nrow(uji_1), n_hilang)
uji_1[idx, col] = NA
}
# Simpan nilai asli yang nanti akan kita tebak
mask_1 = is.na(uji_1)
nilai_asli_1 = gt_met[mask_1]
Duplikasi data dan hilangkan 10% nilai dalam data
set.seed(123)
uji_2 = gt_met
for (col in names(uji_2)) {
n_hilang = round(0.1 * nrow(uji_2))
idx = sample(1:nrow(uji_2), n_hilang)
uji_2[idx, col] = NA
}
# Simpan nilai asli yang nanti akan kita tebak
mask_2 = is.na(uji_2)
nilai_asli_2 = gt_met[mask_2]
Duplikasi data dan hilangkan 15% nilai dalam data
set.seed(123)
uji_3 = gt_met
for (col in names(uji_3)) {
n_hilang = round(0.15 * nrow(uji_3))
idx = sample(1:nrow(uji_3), n_hilang)
uji_3[idx, col] = NA
}
# Simpan nilai asli yang nanti akan kita tebak
mask_3 = is.na(uji_3)
nilai_asli_3 = gt_met[mask_3]
Duplikasi data dan hilangkan 30% nilai dalam data
set.seed(123)
uji_4 = gt_met
for (col in names(uji_4)) {
n_hilang = round(0.30 * nrow(uji_4))
idx = sample(1:nrow(uji_4), n_hilang)
uji_4[idx, col] = NA
}
# Simpan nilai asli yang nanti akan kita tebak
mask_4 = is.na(uji_4)
nilai_asli_4 = gt_met[mask_4]
Duplikasi data dan hilangkan 15% nilai dalam data
set.seed(123)
uji_5 = gt_met
for (col in names(uji_5)) {
n_hilang = round(0.4 * nrow(uji_5))
idx = sample(1:nrow(uji_4), n_hilang)
uji_5[idx, col] = NA
}
# Simpan nilai asli yang nanti akan kita tebak
mask_5 = is.na(uji_5)
nilai_asli_5 = gt_met[mask_5]
library(missForest)
##
## Attaching package: 'missForest'
## The following object is masked from 'package:VIM':
##
## nrmse
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ tibble 3.3.1
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
MF_1 = missForest(as.matrix(uji_1))
MF_1 = as.data.frame(MF_1$ximp)
MF_2 = missForest(as.matrix(uji_2))
MF_2 = as.data.frame(MF_2$ximp)
MF_3 = missForest(as.matrix(uji_3))
MF_3 = as.data.frame(MF_3$ximp)
MF_4 = missForest(as.matrix(uji_4))
MF_4 = as.data.frame(MF_4$ximp)
MF_5 = missForest(as.matrix(uji_5))
MF_6 = as.data.frame(MF_5$ximp)
prediksi_mf_1 = MF_1[mask_1]
prediksi_mf_2 = MF_2[mask_2]
prediksi_mf_3 = MF_3[mask_3]
prediksi_mf_4 = MF_4[mask_4]
prediksi_mf_5 = MF_5[mask_5]
# A. Transformasi Data
uji_log_1 = uji_1; uji_log_1$RR = log1p(uji_log_1$RR)
uji_log_2 = uji_2; uji_log_2$RR = log1p(uji_log_2$RR)
uji_log_3 = uji_3; uji_log_3$RR = log1p(uji_log_3$RR)
uji_log_4 = uji_4; uji_log_4$RR = log1p(uji_log_4$RR)
uji_log_5 = uji_5; uji_log_5$RR = log1p(uji_log_5$RR)
# B. Imputasi MissForest
set.seed(123); mf_log_1 = missForest(as.matrix(uji_log_1))$ximp
set.seed(123); mf_log_2 = missForest(as.matrix(uji_log_2))$ximp
set.seed(123); mf_log_3 = missForest(as.matrix(uji_log_3))$ximp
set.seed(123); mf_log_4 = missForest(as.matrix(uji_log_4))$ximp
set.seed(123); mf_log_5 = missForest(as.matrix(uji_log_5))$ximp
# C. Inverse Transformasi: expm1(x)
tebak_log_1 = expm1(mf_log_1[mask_1[, "RR"], "RR"])
tebak_log_2 = expm1(mf_log_2[mask_2[, "RR"], "RR"])
tebak_log_3 = expm1(mf_log_3[mask_3[, "RR"], "RR"])
tebak_log_4 = expm1(mf_log_4[mask_4[, "RR"], "RR"])
tebak_log_5 = expm1(mf_log_5[mask_5[, "RR"], "RR"])
# A. Transformasi Data
uji_sqrt_1 = uji_1; uji_sqrt_1$RR = sqrt(uji_sqrt_1$RR)
uji_sqrt_2 = uji_2; uji_sqrt_2$RR = sqrt(uji_sqrt_2$RR)
uji_sqrt_3 = uji_3; uji_sqrt_3$RR = sqrt(uji_sqrt_3$RR)
uji_sqrt_4 = uji_4; uji_sqrt_4$RR = sqrt(uji_sqrt_4$RR)
uji_sqrt_5 = uji_5; uji_sqrt_5$RR = sqrt(uji_sqrt_5$RR)
# B. Imputasi MissForest
set.seed(123); mf_sqrt_1 = missForest(as.matrix(uji_sqrt_1))$ximp
set.seed(123); mf_sqrt_2 = missForest(as.matrix(uji_sqrt_2))$ximp
set.seed(123); mf_sqrt_3 = missForest(as.matrix(uji_sqrt_3))$ximp
set.seed(123); mf_sqrt_4 = missForest(as.matrix(uji_sqrt_4))$ximp
set.seed(123); mf_sqrt_5 = missForest(as.matrix(uji_sqrt_5))$ximp
# C. Inverse Transformasi: x^2
tebak_sqrt_1 = (mf_sqrt_1[mask_1[, "RR"], "RR"])^2
tebak_sqrt_2 = (mf_sqrt_2[mask_2[, "RR"], "RR"])^2
tebak_sqrt_3 = (mf_sqrt_3[mask_3[, "RR"], "RR"])^2
tebak_sqrt_4 = (mf_sqrt_4[mask_4[, "RR"], "RR"])^2
tebak_sqrt_5 = (mf_sqrt_5[mask_5[, "RR"], "RR"])^2
# A. Cari Lambda & Transformasi Data
uji_boxcox_1 = uji_1; lam_1 = BoxCox.lambda(na.omit(uji_boxcox_1$RR) + 1); uji_boxcox_1$RR = BoxCox(uji_boxcox_1$RR + 1, lam_1)
uji_boxcox_2 = uji_2; lam_2 = BoxCox.lambda(na.omit(uji_boxcox_2$RR) + 1); uji_boxcox_2$RR = BoxCox(uji_boxcox_2$RR + 1, lam_2)
uji_boxcox_3 = uji_3; lam_3 = BoxCox.lambda(na.omit(uji_boxcox_3$RR) + 1); uji_boxcox_3$RR = BoxCox(uji_boxcox_3$RR + 1, lam_3)
uji_boxcox_4 = uji_4; lam_4 = BoxCox.lambda(na.omit(uji_boxcox_4$RR) + 1); uji_boxcox_4$RR = BoxCox(uji_boxcox_4$RR + 1, lam_4)
uji_boxcox_5 = uji_5; lam_5 = BoxCox.lambda(na.omit(uji_boxcox_5$RR) + 1); uji_boxcox_5$RR = BoxCox(uji_boxcox_5$RR + 1, lam_5)
# B. Imputasi MissForest
set.seed(123); mf_boxcox_1 = missForest(as.matrix(uji_boxcox_1))$ximp
set.seed(123); mf_boxcox_2 = missForest(as.matrix(uji_boxcox_2))$ximp
set.seed(123); mf_boxcox_3 = missForest(as.matrix(uji_boxcox_3))$ximp
set.seed(123); mf_boxcox_4 = missForest(as.matrix(uji_boxcox_4))$ximp
set.seed(123); mf_boxcox_5 = missForest(as.matrix(uji_boxcox_5))$ximp
# C. Inverse Transformasi: InvBoxCox(x) - 1
tebak_bx_1 = InvBoxCox(mf_boxcox_1[mask_1[, "RR"], "RR"], lam_1) - 1
tebak_bx_2 = InvBoxCox(mf_boxcox_2[mask_2[, "RR"], "RR"], lam_2) - 1
tebak_bx_3 = InvBoxCox(mf_boxcox_3[mask_3[, "RR"], "RR"], lam_3) - 1
tebak_bx_4 = InvBoxCox(mf_boxcox_4[mask_4[, "RR"], "RR"], lam_4) - 1
tebak_bx_5 = InvBoxCox(mf_boxcox_5[mask_5[, "RR"], "RR"], lam_5) - 1
# Fungsi Rumus Metrik
hitung_rmse = function(asli, tebakan) { sqrt(mean((asli - tebakan)^2, na.rm=TRUE)) }
hitung_mae = function(asli, tebakan) { mean(abs(asli - tebakan), na.rm=TRUE) }
hitung_nrmse = function(asli, tebakan, sd_asli) { hitung_rmse(asli, tebakan) / sd_asli }
hitung_r2 = function(asli, tebakan) { cor(asli, tebakan, use="complete.obs")^2 }
# Data Ground Truth (gt_met)
jawaban_asli_1 = gt_met$RR[mask_1[, "RR"]]
jawaban_asli_2 = gt_met$RR[mask_2[, "RR"]]
jawaban_asli_3 = gt_met$RR[mask_3[, "RR"]]
jawaban_asli_4 = gt_met$RR[mask_4[, "RR"]]
jawaban_asli_5 = gt_met$RR[mask_5[, "RR"]]
sd_1 = sd(jawaban_asli_1); sd_2 = sd(jawaban_asli_2); sd_3 = sd(jawaban_asli_3)
sd_4 = sd(jawaban_asli_4); sd_5 = sd(jawaban_asli_5)
# Ekstraksi Tebakan Data Asli / Tanpa Transformasi (Baseline)
# Catatan: Skema 5 dialihkan ke objek MF_6 sesuai nama penyimpanan di console R kamu
tebak_biasa_1 = MF_1[mask_1[, "RR"], "RR"]
tebak_biasa_2 = MF_2[mask_2[, "RR"], "RR"]
tebak_biasa_3 = MF_3[mask_3[, "RR"], "RR"]
tebak_biasa_4 = MF_4[mask_4[, "RR"], "RR"]
tebak_biasa_5 = MF_6[mask_5[, "RR"], "RR"]
# Tanpa Transformasi (Biasa)
rmse_biasa = c(hitung_rmse(jawaban_asli_1, tebak_biasa_1), hitung_rmse(jawaban_asli_2, tebak_biasa_2), hitung_rmse(jawaban_asli_3, tebak_biasa_3), hitung_rmse(jawaban_asli_4, tebak_biasa_4), hitung_rmse(jawaban_asli_5, tebak_biasa_5))
mae_biasa = c(hitung_mae(jawaban_asli_1, tebak_biasa_1), hitung_mae(jawaban_asli_2, tebak_biasa_2), hitung_mae(jawaban_asli_3, tebak_biasa_3), hitung_mae(jawaban_asli_4, tebak_biasa_4), hitung_mae(jawaban_asli_5, tebak_biasa_5))
nrmse_biasa = c(hitung_nrmse(jawaban_asli_1, tebak_biasa_1, sd_1), hitung_nrmse(jawaban_asli_2, tebak_biasa_2, sd_2), hitung_nrmse(jawaban_asli_3, tebak_biasa_3, sd_3), hitung_nrmse(jawaban_asli_4, tebak_biasa_4, sd_4), hitung_nrmse(jawaban_asli_5, tebak_biasa_5, sd_5))
r2_biasa = c(hitung_r2(jawaban_asli_1, tebak_biasa_1), hitung_r2(jawaban_asli_2, tebak_biasa_2), hitung_r2(jawaban_asli_3, tebak_biasa_3), hitung_r2(jawaban_asli_4, tebak_biasa_4), hitung_r2(jawaban_asli_5, tebak_biasa_5))
# Logaritma
rmse_log = c(hitung_rmse(jawaban_asli_1, tebak_log_1), hitung_rmse(jawaban_asli_2, tebak_log_2), hitung_rmse(jawaban_asli_3, tebak_log_3), hitung_rmse(jawaban_asli_4, tebak_log_4), hitung_rmse(jawaban_asli_5, tebak_log_5))
mae_log = c(hitung_mae(jawaban_asli_1, tebak_log_1), hitung_mae(jawaban_asli_2, tebak_log_2), hitung_mae(jawaban_asli_3, tebak_log_3), hitung_mae(jawaban_asli_4, tebak_log_4), hitung_mae(jawaban_asli_5, tebak_log_5))
nrmse_log = c(hitung_nrmse(jawaban_asli_1, tebak_log_1, sd_1), hitung_nrmse(jawaban_asli_2, tebak_log_2, sd_2), hitung_nrmse(jawaban_asli_3, tebak_log_3, sd_3), hitung_nrmse(jawaban_asli_4, tebak_log_4, sd_4), hitung_nrmse(jawaban_asli_5, tebak_log_5, sd_5))
r2_log = c(hitung_r2(jawaban_asli_1, tebak_log_1), hitung_r2(jawaban_asli_2, tebak_log_2), hitung_r2(jawaban_asli_3, tebak_log_3), hitung_r2(jawaban_asli_4, tebak_log_4), hitung_r2(jawaban_asli_5, tebak_log_5))
# Akar Kuadrat
rmse_sqrt = c(hitung_rmse(jawaban_asli_1, tebak_sqrt_1), hitung_rmse(jawaban_asli_2, tebak_sqrt_2), hitung_rmse(jawaban_asli_3, tebak_sqrt_3), hitung_rmse(jawaban_asli_4, tebak_sqrt_4), hitung_rmse(jawaban_asli_5, tebak_sqrt_5))
mae_sqrt = c(hitung_mae(jawaban_asli_1, tebak_sqrt_1), hitung_mae(jawaban_asli_2, tebak_sqrt_2), hitung_mae(jawaban_asli_3, tebak_sqrt_3), hitung_mae(jawaban_asli_4, tebak_sqrt_4), hitung_mae(jawaban_asli_5, tebak_sqrt_5))
nrmse_sqrt = c(hitung_nrmse(jawaban_asli_1, tebak_sqrt_1, sd_1), hitung_nrmse(jawaban_asli_2, tebak_sqrt_2, sd_2), hitung_nrmse(jawaban_asli_3, tebak_sqrt_3, sd_3), hitung_nrmse(jawaban_asli_4, tebak_sqrt_4, sd_4), hitung_nrmse(jawaban_asli_5, tebak_sqrt_5, sd_5))
r2_sqrt = c(hitung_r2(jawaban_asli_1, tebak_sqrt_1), hitung_r2(jawaban_asli_2, tebak_sqrt_2), hitung_r2(jawaban_asli_3, tebak_sqrt_3), hitung_r2(jawaban_asli_4, tebak_sqrt_4), hitung_r2(jawaban_asli_5, tebak_sqrt_5))
# Box-Cox
rmse_boxcox = c(hitung_rmse(jawaban_asli_1, tebak_bx_1), hitung_rmse(jawaban_asli_2, tebak_bx_2), hitung_rmse(jawaban_asli_3, tebak_bx_3), hitung_rmse(jawaban_asli_4, tebak_bx_4), hitung_rmse(jawaban_asli_5, tebak_bx_5))
mae_boxcox = c(hitung_mae(jawaban_asli_1, tebak_bx_1), hitung_mae(jawaban_asli_2, tebak_bx_2), hitung_mae(jawaban_asli_3, tebak_bx_3), hitung_mae(jawaban_asli_4, tebak_bx_4), hitung_mae(jawaban_asli_5, tebak_bx_5))
nrmse_boxcox = c(hitung_nrmse(jawaban_asli_1, tebak_bx_1, sd_1), hitung_nrmse(jawaban_asli_2, tebak_bx_2, sd_2), hitung_nrmse(jawaban_asli_3, tebak_bx_3, sd_3), hitung_nrmse(jawaban_asli_4, tebak_bx_4, sd_4), hitung_nrmse(jawaban_asli_5, tebak_bx_5, sd_5))
r2_boxcox = c(hitung_r2(jawaban_asli_1, tebak_bx_1), hitung_r2(jawaban_asli_2, tebak_bx_2), hitung_r2(jawaban_asli_3, tebak_bx_3), hitung_r2(jawaban_asli_4, tebak_bx_4), hitung_r2(jawaban_asli_5, tebak_bx_5))
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
skema_label = c("Skema 1 (5%)", "Skema 2 (10%)", "Skema 3 (15%)", "Skema 4 (30%)", "Skema 5 (40%)")
# Susun Data Frame Master
tabel_master = data.frame(
Skema_Missing = rep(skema_label, each = 4),
Metode_Transformasi = rep(c("1. Tanpa Transformasi", "2. Logaritma", "3. Akar Kuadrat", "4. Box-Cox"), times = 5),
SD_Aktual = rep(c(sd_1, sd_2, sd_3, sd_4, sd_5), each = 4),
RMSE = c(rbind(rmse_biasa, rmse_log, rmse_sqrt, rmse_boxcox)),
MAE = c(rbind(mae_biasa, mae_log, mae_sqrt, mae_boxcox)),
NRMSE_SD = c(rbind(nrmse_biasa, nrmse_log, nrmse_sqrt, nrmse_boxcox)),
R_Squared = c(rbind(r2_biasa, r2_log, r2_sqrt, r2_boxcox))
)
# RENDER TABEL 1: METRIK ERROR ABSOLUT & RELATIF (ADA SD)
tabel_error = tabel_master %>%
select(Skema_Missing, Metode_Transformasi, SD_Aktual, RMSE, MAE, NRMSE_SD)
tabel_error %>%
kable(digits = 4,
caption = "<b>Tabel 1: Evaluasi Metrik Error (RMSE, MAE, NRMSE) dan SD pada Berbagai Transformasi Curah Hujan</b>",
align = "c", escape = FALSE, row.names = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, position = "center") %>%
collapse_rows(columns = 1:3, valign = "top")
| Skema_Missing | Metode_Transformasi | SD_Aktual | RMSE | MAE | NRMSE_SD |
|---|---|---|---|---|---|
| Skema 1 (5%) |
|
11.4686 | 10.5507 | 6.2699 | 0.9200 |
|
11.1929 | 5.3828 | 0.9760 | ||
|
10.9304 | 5.4797 | 0.9531 | ||
|
11.4031 | 5.3808 | 0.9943 | ||
| Skema 2 (10%) |
|
11.7941 | 10.7316 | 6.5002 | 0.9099 |
|
11.2402 | 5.3071 | 0.9530 | ||
|
10.8893 | 5.4165 | 0.9233 | ||
|
11.4601 | 5.2527 | 0.9717 | ||
| Skema 3 (15%) |
|
12.4520 | 11.2911 | 6.6248 | 0.9068 |
|
11.8780 | 5.6053 | 0.9539 | ||
|
11.5262 | 5.6686 | 0.9256 | ||
|
12.1416 | 5.5821 | 0.9751 | ||
| Skema 4 (30%) |
|
12.3741 | 11.6104 | 7.0111 | 0.9383 |
|
11.9421 | 5.8583 | 0.9651 | ||
|
11.7155 | 6.0300 | 0.9468 | ||
|
12.1828 | 5.7987 | 0.9845 | ||
| Skema 5 (40%) |
|
12.1162 | 12.4815 | 7.8556 | 1.0302 |
|
11.8622 | 6.2655 | 0.9790 | ||
|
11.9414 | 6.6334 | 0.9856 | ||
|
12.0146 | 6.1125 | 0.9916 |
# RENDER TABEL 2: KESESUAIAN POLA (R-SQUARED)
tabel_rsquare = tabel_master %>%
select(Skema_Missing, Metode_Transformasi, R_Squared)
tabel_rsquare %>%
kable(digits = 4,
caption = "<b>Tabel 2: Evaluasi Kesesuaian Pola (R-Squared) pada Berbagai Transformasi Curah Hujan</b>",
align = "c", escape = FALSE, row.names = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, position = "center") %>%
collapse_rows(columns = 1, valign = "top")
| Skema_Missing | Metode_Transformasi | R_Squared |
|---|---|---|
| Skema 1 (5%) |
|
0.1634 |
|
0.1222 | |
|
0.1284 | |
|
0.1174 | |
| Skema 2 (10%) |
|
0.1806 |
|
0.1691 | |
|
0.1771 | |
|
0.1787 | |
| Skema 3 (15%) |
|
0.1798 |
|
0.1820 | |
|
0.1835 | |
|
0.1830 | |
| Skema 4 (30%) |
|
0.1468 |
|
0.1314 | |
|
0.1350 | |
|
0.1246 | |
| Skema 5 (40%) |
|
0.0944 |
|
0.0966 | |
|
0.0957 | |
|
0.0922 |
# Seluruh Variabel
# Fungsi untuk mengekstrak metrik untuk SETIAP kolom di dalam data
evaluasi_semua_variabel = function(data_asli, data_uji, data_imputasi, nama_skema) {
hasil_list = list()
# Looping untuk setiap nama kolom di dataset
for (kolom in names(data_asli)) {
# Lewati kolom yang bukan metrik cuaca
if (kolom %in% c("Tanggal", "tahun")) next
# Cari posisi NA khusus di kolom ini
mask_na = is.na(data_uji[[kolom]])
if (sum(mask_na) > 0) {
asli_val = data_asli[[kolom]][mask_na]
pred_val = data_imputasi[[kolom]][mask_na]
# Hitung Metrik
sd_v = sd(asli_val, na.rm = TRUE)
rmse_v = hitung_rmse(asli_val, pred_val)
mae_v = hitung_mae(asli_val, pred_val)
nrmse_v = hitung_nrmse(asli_val, pred_val, sd_v)
r2_v = hitung_r2(asli_val, pred_val)
# Simpan ke dataframe sementara
hasil_list[[kolom]] = data.frame(
Skema = nama_skema,
Variabel = kolom,
SD_Aktual = round(sd_v, 4),
RMSE = round(rmse_v, 4),
MAE = round(mae_v, 4),
NRMSE = round(nrmse_v, 4),
R_Squared = round(r2_v, 4)
)
}
}
# Gabungkan semua variabel jadi satu tabel
return(bind_rows(hasil_list))
}
# Tanpa Transformasi
eval_biasa_1 = evaluasi_semua_variabel(gt_met, uji_1, MF_1, "Skema 1 (5%)")
eval_biasa_2 = evaluasi_semua_variabel(gt_met, uji_2, MF_2, "Skema 2 (10%)")
eval_biasa_3 = evaluasi_semua_variabel(gt_met, uji_3, MF_3, "Skema 3 (15%)")
eval_biasa_4 = evaluasi_semua_variabel(gt_met, uji_4, MF_4, "Skema 4 (30%)")
eval_biasa_5 = evaluasi_semua_variabel(gt_met, uji_5, MF_6, "Skema 5 (40%)")
# Gabungkan jadi 1 master tabel
tabel_pervar_biasa = bind_rows(eval_biasa_1, eval_biasa_2, eval_biasa_3, eval_biasa_4, eval_biasa_5)
# Visualisasi : Tanpa Transformasi
tabel_pervar_biasa %>%
kable(caption = "<b>Tabel Evaluasi Per Variabel: Kinerja MissForest pada Data Asli (Tanpa Transformasi)</b>",
align = "c", escape = FALSE, row.names = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, position = "center") %>%
collapse_rows(columns = 1, valign = "top") %>%
row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") # Header warna hijau
| Skema | Variabel | SD_Aktual | RMSE | MAE | NRMSE | R_Squared |
|---|---|---|---|---|---|---|
| Skema 1 (5%) | Tn | 1.5779 | 0.9770 | 0.7382 | 0.6192 | 0.6187 |
| Tx | 1.5428 | 0.9791 | 0.6888 | 0.6346 | 0.5988 | |
| Tavg | 0.9581 | 0.5840 | 0.4302 | 0.6095 | 0.6290 | |
| RH_avg | 7.5312 | 4.0017 | 3.1217 | 0.5313 | 0.7173 | |
| RR | 11.4686 | 10.5507 | 6.2699 | 0.9200 | 0.1634 | |
| ss | 2.2035 | 1.6744 | 1.2730 | 0.7599 | 0.4352 | |
| ff_avg | 1.0859 | 0.9107 | 0.6898 | 0.8387 | 0.3008 | |
| Skema 2 (10%) | Tn | 1.5445 | 0.9767 | 0.7476 | 0.6324 | 0.6018 |
| Tx | 1.5484 | 0.9199 | 0.6955 | 0.5941 | 0.6517 | |
| Tavg | 0.8566 | 0.5448 | 0.4193 | 0.6360 | 0.5953 | |
| RH_avg | 7.6250 | 4.2300 | 3.2177 | 0.5548 | 0.6939 | |
| RR | 11.7941 | 10.7316 | 6.5002 | 0.9099 | 0.1806 | |
| ss | 2.3298 | 1.6733 | 1.2964 | 0.7182 | 0.4842 | |
| ff_avg | 1.1282 | 0.9654 | 0.7406 | 0.8557 | 0.2678 | |
| Skema 3 (15%) | Tn | 1.5124 | 0.9954 | 0.7703 | 0.6581 | 0.5669 |
| Tx | 1.4739 | 0.9446 | 0.7254 | 0.6409 | 0.5901 | |
| Tavg | 0.9250 | 0.5956 | 0.4508 | 0.6438 | 0.5853 | |
| RH_avg | 7.4837 | 4.3842 | 3.4063 | 0.5858 | 0.6566 | |
| RR | 12.4520 | 11.2911 | 6.6248 | 0.9068 | 0.1798 | |
| ss | 2.2611 | 1.6389 | 1.2865 | 0.7248 | 0.4746 | |
| ff_avg | 1.1101 | 0.9558 | 0.7359 | 0.8609 | 0.2590 | |
| Skema 4 (30%) | Tn | 1.5309 | 1.1481 | 0.8642 | 0.7499 | 0.4562 |
| Tx | 1.4971 | 1.0507 | 0.7738 | 0.7018 | 0.5133 | |
| Tavg | 0.8864 | 0.6670 | 0.5080 | 0.7525 | 0.4575 | |
| RH_avg | 7.5323 | 4.9666 | 3.8598 | 0.6594 | 0.5760 | |
| RR | 12.3741 | 11.6104 | 7.0111 | 0.9383 | 0.1468 | |
| ss | 2.3069 | 1.7568 | 1.3530 | 0.7616 | 0.4274 | |
| ff_avg | 1.1520 | 1.0370 | 0.7882 | 0.9002 | 0.1963 | |
| Skema 5 (40%) | Tn | 1.5266 | 1.2695 | 0.9504 | 0.8316 | 0.3572 |
| Tx | 1.4853 | 1.1375 | 0.8478 | 0.7658 | 0.4365 | |
| Tavg | 0.9220 | 0.7340 | 0.5583 | 0.7961 | 0.4037 | |
| RH_avg | 7.7043 | 5.4699 | 4.1614 | 0.7100 | 0.5131 | |
| RR | 12.1162 | 12.4815 | 7.8556 | 1.0302 | 0.0944 | |
| ss | 2.2859 | 1.8843 | 1.4592 | 0.8243 | 0.3487 | |
| ff_avg | 1.1357 | 1.0906 | 0.8324 | 0.9603 | 0.1238 |
# Siapkan Data Imputasi (Copy dulu agar data asli tidak rusak)
imputasi_log_1 = as.data.frame(mf_log_1)
imputasi_log_2 = as.data.frame(mf_log_2)
imputasi_log_3 = as.data.frame(mf_log_3)
imputasi_log_4 = as.data.frame(mf_log_4)
imputasi_log_5 = as.data.frame(mf_log_5)
# Kembalikan HANYA kolom RR ke skala milimeter (Inverse expm1)
imputasi_log_1$RR = expm1(imputasi_log_1$RR)
imputasi_log_2$RR = expm1(imputasi_log_2$RR)
imputasi_log_3$RR = expm1(imputasi_log_3$RR)
imputasi_log_4$RR = expm1(imputasi_log_4$RR)
imputasi_log_5$RR = expm1(imputasi_log_5$RR)
# Ekstraksi Metrik
eval_log_1 = evaluasi_semua_variabel(gt_met, uji_1, imputasi_log_1, "Skema 1 (5%)")
eval_log_2 = evaluasi_semua_variabel(gt_met, uji_2, imputasi_log_2, "Skema 2 (10%)")
eval_log_3 = evaluasi_semua_variabel(gt_met, uji_3, imputasi_log_3, "Skema 3 (15%)")
eval_log_4 = evaluasi_semua_variabel(gt_met, uji_4, imputasi_log_4, "Skema 4 (30%)")
eval_log_5 = evaluasi_semua_variabel(gt_met, uji_5, imputasi_log_5, "Skema 5 (40%)")
# Gabungkan jadi 1 master tabel
tabel_pervar_log = bind_rows(eval_log_1, eval_log_2, eval_log_3, eval_log_4, eval_log_5)
# Visualisasi Dengan Transformasi
tabel_pervar_log %>%
kable(caption = "<b>Tabel Evaluasi Per Variabel: Kinerja MissForest pada Data dengan Transformasi (Logaritma pada RR)</b>",
align = "c", escape = FALSE, row.names = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, position = "center") %>%
collapse_rows(columns = 1, valign = "top") %>%
row_spec(0, bold = TRUE, color = "white", background = "#2196F3") # Header warna biru
| Skema | Variabel | SD_Aktual | RMSE | MAE | NRMSE | R_Squared |
|---|---|---|---|---|---|---|
| Skema 1 (5%) | Tn | 1.5779 | 0.9712 | 0.7368 | 0.6155 | 0.6232 |
| Tx | 1.5428 | 0.9742 | 0.6913 | 0.6314 | 0.6036 | |
| Tavg | 0.9581 | 0.5825 | 0.4280 | 0.6080 | 0.6309 | |
| RH_avg | 7.5312 | 3.9567 | 3.0824 | 0.5254 | 0.7237 | |
| RR | 11.4686 | 11.1929 | 5.3828 | 0.9760 | 0.1222 | |
| ss | 2.2035 | 1.6686 | 1.2650 | 0.7573 | 0.4400 | |
| ff_avg | 1.0859 | 0.9115 | 0.6915 | 0.8395 | 0.2982 | |
| Skema 2 (10%) | Tn | 1.5445 | 0.9826 | 0.7549 | 0.6362 | 0.5963 |
| Tx | 1.5484 | 0.9263 | 0.6996 | 0.5982 | 0.6459 | |
| Tavg | 0.8566 | 0.5454 | 0.4182 | 0.6367 | 0.5946 | |
| RH_avg | 7.6250 | 4.1481 | 3.1529 | 0.5440 | 0.7054 | |
| RR | 11.7941 | 11.2402 | 5.3071 | 0.9530 | 0.1691 | |
| ss | 2.3298 | 1.6818 | 1.3035 | 0.7219 | 0.4792 | |
| ff_avg | 1.1282 | 0.9602 | 0.7339 | 0.8510 | 0.2753 | |
| Skema 3 (15%) | Tn | 1.5124 | 0.9959 | 0.7693 | 0.6585 | 0.5667 |
| Tx | 1.4739 | 0.9451 | 0.7206 | 0.6412 | 0.5898 | |
| Tavg | 0.9250 | 0.5996 | 0.4548 | 0.6482 | 0.5807 | |
| RH_avg | 7.4837 | 4.3670 | 3.3897 | 0.5835 | 0.6594 | |
| RR | 12.4520 | 11.8780 | 5.6053 | 0.9539 | 0.1820 | |
| ss | 2.2611 | 1.6378 | 1.2808 | 0.7243 | 0.4756 | |
| ff_avg | 1.1101 | 0.9534 | 0.7298 | 0.8588 | 0.2629 | |
| Skema 4 (30%) | Tn | 1.5309 | 1.1333 | 0.8534 | 0.7403 | 0.4666 |
| Tx | 1.4971 | 1.0469 | 0.7761 | 0.6993 | 0.5160 | |
| Tavg | 0.8864 | 0.6643 | 0.5041 | 0.7495 | 0.4602 | |
| RH_avg | 7.5323 | 4.9800 | 3.8611 | 0.6612 | 0.5752 | |
| RR | 12.3741 | 11.9421 | 5.8583 | 0.9651 | 0.1314 | |
| ss | 2.3069 | 1.7476 | 1.3478 | 0.7575 | 0.4325 | |
| ff_avg | 1.1520 | 1.0375 | 0.7837 | 0.9006 | 0.1963 | |
| Skema 5 (40%) | Tn | 1.5266 | 1.2681 | 0.9504 | 0.8307 | 0.3572 |
| Tx | 1.4853 | 1.1379 | 0.8516 | 0.7661 | 0.4333 | |
| Tavg | 0.9220 | 0.7355 | 0.5595 | 0.7977 | 0.3990 | |
| RH_avg | 7.7043 | 5.4855 | 4.1939 | 0.7120 | 0.5115 | |
| RR | 12.1162 | 11.8622 | 6.2655 | 0.9790 | 0.0966 | |
| ss | 2.2859 | 1.8906 | 1.4627 | 0.8271 | 0.3461 | |
| ff_avg | 1.1357 | 1.0842 | 0.8254 | 0.9547 | 0.1326 |