Input Data
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# === DATA BANDUNG ===
bandung = read_excel("D:\\Semester 6\\DC\\2\\Data Suhu dan CH Kota Bandung.xlsx",
sheet = "Sheet1",
guess_max = 50000)
# === DATA STASIUN PEMBANDING ===
citeko = read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx",
sheet = "Stasiun Citeko",
skip = 8,
guess_max = 50000)
klimat = read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx",
sheet = "Stasiun Klimatologi jawa barat",
skip = 8,
guess_max = 50000)
names(citeko)
## [1] "Tanggal" "Tn" "Tx" "Tavg" "RH_avg" "RR"
names(klimat)
## [1] "Tanggal" "Tn" "Tx" "Tavg" "RH_avg" "RR"
names(bandung)
## [1] "Tanggal" "Tn" "Tx" "RR"
Persiapan Data
# Bersihkan kode missing
clean_missing <- function(x){
x[x == -99.9] <- NA
x[x == 8888] <- NA
return(x)
}
bandung$RR <- clean_missing(bandung$RR)
bandung$Tn <- clean_missing(bandung$Tn)
bandung$Tx <- clean_missing(bandung$Tx)
citeko$RR <- clean_missing(citeko$RR)
citeko$Tn <- clean_missing(citeko$Tn)
citeko$Tx <- clean_missing(citeko$Tx)
klimat$RR <- clean_missing(klimat$RR)
# Panggil ulang datanya
citeko <- read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Citeko", skip = 8, guess_max = 50000)
klimat <- read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Klimatologi jawa barat", skip = 8, guess_max = 50000)
# Untuk data Bandung, fungsi as.Date biasa sepertinya sudah berhasil tanpa error
bandung$Tanggal <- as.Date(bandung$Tanggal)
# Untuk data Citeko dan Klimat, kita terjemahkan angka serial Excel-nya
citeko$Tanggal <- as.Date(as.numeric(citeko$Tanggal), origin = "1899-12-30")
## Warning in as.Date(as.numeric(citeko$Tanggal), origin = "1899-12-30"): NAs
## introduced by coercion
klimat$Tanggal <- as.Date(as.numeric(klimat$Tanggal), origin = "1899-12-30")
## Warning in as.Date(as.numeric(klimat$Tanggal), origin = "1899-12-30"): NAs
## introduced by coercion
# Cek hasilnya
head(citeko$Tanggal)
## [1] "1975-01-01" "1975-01-02" "1975-01-03" "1975-01-04" "1975-01-05"
## [6] "1975-01-06"
bandung$Tanggal <- as.Date(bandung$Tanggal)
citeko$Tanggal <- as.Date(citeko$Tanggal)
klimat$Tanggal <- as.Date(klimat$Tanggal)
data_suhu <- merge(bandung, citeko, by="Tanggal", suffixes=c("_bdg","_ctk"))
data_hujan <- merge(bandung, klimat, by="Tanggal", suffixes=c("_bdg","_klm"))
Imputasi Suhu (Regresi Linear)
# Ubah tipe data menjadi numerik
# Lakukan untuk suhu maksimum
data_suhu$Tx_bdg = as.numeric(data_suhu$Tx_bdg)
data_suhu$Tx_ctk = as.numeric(data_suhu$Tx_ctk)
# (Sekalian untuk suhu minimum agar nanti tidak error juga)
data_suhu$Tn_bdg = as.numeric(data_suhu$Tn_bdg)
data_suhu$Tn_ctk = as.numeric(data_suhu$Tn_ctk)
Tmin (minimum)
# Suhu minimum
model_tn <- lm(Tn_bdg ~ Tn_ctk, data=data_suhu)
missing_tn <- is.na(data_suhu$Tn_bdg)
data_suhu$Tn_bdg[missing_tn] <- predict(model_tn,
newdata=data_suhu[missing_tn,])
Tmax (maximum)
# Suhu maximum
model_tx <- lm(Tx_bdg ~ Tx_ctk, data=data_suhu)
missing_tx <- is.na(data_suhu$Tx_bdg)
data_suhu$Tx_bdg[missing_tx] <- predict(model_tx,
newdata=data_suhu[missing_tx,])
Imputasi Curah Hujan (Normal Ratio)
# Ubah tipe data jadi numerik
data_hujan$RR_bdg = as.numeric(data_hujan$RR_bdg)
data_hujan$RR_klm = as.numeric(data_hujan$RR_klm)
# Hitung rataannya
mean_bdg <- mean(data_hujan$RR_bdg, na.rm=TRUE)
mean_klm <- mean(data_hujan$RR_klm, na.rm=TRUE)
# Imputasi Normal Ratio
missing_rr <- is.na(data_hujan$RR_bdg)
data_hujan$RR_bdg[missing_rr] <-
(mean_bdg / mean_klm) * data_hujan$RR_klm[missing_rr]
Validasi Hasil Imputasi
# Membuat rekap
rekap_fungsi = function(x) {
c(Min = min(x, na.rm = TRUE),
Q1 = quantile(x, 0.25, na.rm = TRUE),
Median = median(x, na.rm = TRUE),
Mean = round(mean(x, na.rm = TRUE), 2),
Q3 = quantile(x, 0.75, na.rm = TRUE),
Max = max(x, na.rm = TRUE),
Jumlah_NA = sum(is.na(x)))
}
# Menggabungkan ketiga variabel menjadi 1 tabel (matrix)
tabel_rapi = rbind(
Tn_Bandung = rekap_fungsi(data_suhu$Tn_bdg),
Tx_Bandung = rekap_fungsi(data_suhu$Tx_bdg),
RR_Bandung = rekap_fungsi(data_hujan$RR_bdg)
)
# Hasilnya
print(tabel_rapi)
## Min Q1.25% Median Mean Q3.75% Max Jumlah_NA
## Tn_Bandung 13.0 19.0 20.0 19.60 20.6 29 49
## Tx_Bandung 18.8 28.2 29.2 29.12 30.0 36 52
## RR_Bandung 0.0 0.0 0.5 6.83 7.0 160 254
data_hujan_all <- bandung %>%
select(Tanggal, RR_bdg = RR) %>%
left_join(klimat %>% select(Tanggal, RR_klm = RR), by="Tanggal") %>%
left_join(citeko %>% select(Tanggal, RR_ctk = RR), by="Tanggal")
# Ubah tipe data jadi numerik
data_hujan_all$RR_bdg <- as.numeric(data_hujan_all$RR_bdg)
data_hujan_all$RR_klm <- as.numeric(data_hujan_all$RR_klm)
data_hujan_all$RR_ctk <- as.numeric(data_hujan_all$RR_ctk) # Citeko
mean_bdg <- mean(data_hujan_all$RR_bdg, na.rm=TRUE)
mean_klm <- mean(data_hujan_all$RR_klm, na.rm=TRUE)
# Hitung rataan data Bandung
mean_bdg <- mean(data_hujan_all$RR_bdg, na.rm=TRUE)
mean_klm <- mean(data_hujan_all$RR_klm, na.rm=TRUE)
# Imputasi Normal Ratio
missing_rr <- is.na(data_hujan_all$RR_bdg) & !is.na(data_hujan_all$RR_klm)
data_hujan_all$RR_bdg[missing_rr] <-
(mean_bdg / mean_klm) * data_hujan_all$RR_klm[missing_rr]
# Hitung rataan Citeko
mean_ctk <- mean(data_hujan_all$RR_ctk, na.rm=TRUE)
# Imputasi Normal Ratio
missing_rr2 <- is.na(data_hujan_all$RR_bdg) & !is.na(data_hujan_all$RR_ctk)
data_hujan_all$RR_bdg[missing_rr2] <-
(mean_bdg / mean_ctk) * data_hujan_all$RR_ctk[missing_rr2]
# Cek missing value data hujan
missing_rr3 <- is.na(data_hujan_all$RR_bdg) &
data_hujan_all$RR_klm == 0 &
data_hujan_all$RR_ctk == 0
data_hujan_all$RR_bdg[missing_rr3] <- 0
# Mengisi data dengan maksimal gap 7 hari
data_hujan_all$RR_bdg <- na.approx(
data_hujan_all$RR_bdg,
na.rm = FALSE,
maxgap = 7 # hanya isi gap ≤ 7 hari
)
cat("Missing RR setelah imputasi:",
sum(is.na(data_hujan_all$RR_bdg)), "\n")
## Missing RR setelah imputasi: 13
Cek Ulang
# buat kolom bulan-hari
data_hujan_all <- data_hujan_all %>%
mutate(
bulan = month(Tanggal),
hari = day(Tanggal)
)
# hitung rata-rata hujan tiap tanggal kalender
klimat_harian <- data_hujan_all %>%
group_by(bulan, hari) %>%
summarise(mean_rr = mean(RR_bdg, na.rm = TRUE), .groups="drop")
# gabungkan kembali
data_hujan_all <- left_join(
data_hujan_all,
klimat_harian,
by=c("bulan","hari")
)
# isi NA dengan klimatologi
missing_rr_final <- is.na(data_hujan_all$RR_bdg)
data_hujan_all$RR_bdg[missing_rr_final] <-
data_hujan_all$mean_rr[missing_rr_final]
# Hasil
cat("Missing RR final:",
sum(is.na(data_hujan_all$RR_bdg)), "\n")
## Missing RR final: 0
summary(data_hujan_all$RR_bdg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.4106 7.0733 6.8000 198.3290
Cek Korelasi
library(dplyr)
library(lubridate)
# =========================
# 1️⃣ FILTER PERIODE 1990–2022
# =========================
bandung_90 <- bandung %>%
filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022)
klimat_90 <- klimat %>%
filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022)
citeko_90 <- citeko %>%
filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022)
# =========================
# 2️⃣ GABUNG DATA (HANYA YANG LENGKAP)
# =========================
# Bandung vs Staklim Jabar
corr_klimat <- bandung_90 %>%
select(Tanggal, RR_bdg = RR) %>%
inner_join(
klimat_90 %>% select(Tanggal, RR_klm = RR),
by = "Tanggal"
) %>%
filter(!is.na(RR_bdg) & !is.na(RR_klm))
# Bandung vs Citeko
corr_citeko <- bandung_90 %>%
select(Tanggal, RR_bdg = RR) %>%
inner_join(
citeko_90 %>% select(Tanggal, RR_ctk = RR),
by = "Tanggal"
) %>%
filter(!is.na(RR_bdg) & !is.na(RR_ctk))
# Ubah tipe data jadi numerik
corr_klimat$RR_bdg <- as.numeric(corr_klimat$RR_bdg)
corr_klimat$RR_klm <- as.numeric(corr_klimat$RR_klm)
corr_citeko$RR_bdg <- as.numeric(corr_citeko$RR_bdg)
corr_citeko$RR_ctk <- as.numeric(corr_citeko$RR_ctk)
# 1. Pastikan fungsi sapu pembersih sudah aktif
clean_missing <- function(x){
x[x == -99.9] <- NA
x[x == 8888] <- NA
return(x)
}
# 2. Siapkan data Bandung vs Staklim Jabar
corr_klimat <- bandung %>%
filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>%
select(Tanggal, RR_bdg = RR) %>%
inner_join(
klimat %>% filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>% select(Tanggal, RR_klm = RR),
by = "Tanggal"
) %>%
# Ubah ke angka dulu
mutate(RR_bdg = as.numeric(RR_bdg), RR_klm = as.numeric(RR_klm)) %>%
# BERSIHKAN ANGKA 8888 SEKARANG
mutate(RR_bdg = clean_missing(RR_bdg), RR_klm = clean_missing(RR_klm)) %>%
# Baru buang NA-nya
filter(!is.na(RR_bdg) & !is.na(RR_klm))
# 3. Siapkan data Bandung vs Citeko
corr_citeko <- bandung %>%
filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>%
select(Tanggal, RR_bdg = RR) %>%
inner_join(
citeko %>% filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>% select(Tanggal, RR_ctk = RR),
by = "Tanggal"
) %>%
# Ubah ke angka dulu
mutate(RR_bdg = as.numeric(RR_bdg), RR_ctk = as.numeric(RR_ctk)) %>%
# BERSIHKAN ANGKA 8888 SEKARANG
mutate(RR_bdg = clean_missing(RR_bdg), RR_ctk = clean_missing(RR_ctk)) %>%
# Baru buang NA-nya
filter(!is.na(RR_bdg) & !is.na(RR_ctk))
# 4. Hitung Korelasi
cor_klimat <- cor(corr_klimat$RR_bdg, corr_klimat$RR_klm, method = "pearson")
cor_citeko <- cor(corr_citeko$RR_bdg, corr_citeko$RR_ctk, method = "pearson")
# 5. Cetak Hasil
cat("====================================\n")
## ====================================
cat("Periode: 1990–2022\n")
## Periode: 1990–2022
cat("====================================\n")
## ====================================
cat("Korelasi Bandung - Staklim Jabar:", round(cor_klimat, 3), "\n")
## Korelasi Bandung - Staklim Jabar: 0.174
cat("Jumlah pasangan data:", nrow(corr_klimat), "\n\n")
## Jumlah pasangan data: 6853
cat("Korelasi Bandung - Citeko:", round(cor_citeko, 3), "\n")
## Korelasi Bandung - Citeko: 0.258
cat("Jumlah pasangan data:", nrow(corr_citeko), "\n")
## Jumlah pasangan data: 7829
Agregasi
library(dplyr)
library(lubridate)
# =========================================================
# AGREGASI DATA HARIAN MENJADI BULANAN
# =========================================================
# 1. Agregasi Bandung vs Klimat
bulanan_klimat <- corr_klimat %>%
# Membuat kolom penanda Bulan dan Tahun (Contoh: "Jan 1990")
mutate(Bulan_Tahun = floor_date(Tanggal, "month")) %>%
group_by(Bulan_Tahun) %>%
# Menjumlahkan hujan harian menjadi total bulanan
summarise(
Total_RR_bdg = sum(RR_bdg, na.rm = TRUE),
Total_RR_klm = sum(RR_klm, na.rm = TRUE)
)
# 2. Agregasi Bandung vs Citeko
bulanan_citeko <- corr_citeko %>%
mutate(Bulan_Tahun = floor_date(Tanggal, "month")) %>%
group_by(Bulan_Tahun) %>%
summarise(
Total_RR_bdg = sum(RR_bdg, na.rm = TRUE),
Total_RR_ctk = sum(RR_ctk, na.rm = TRUE)
)
# =========================================================
# HITUNG KORELASI BULANAN
# =========================================================
cor_bulanan_klimat <- cor(bulanan_klimat$Total_RR_bdg, bulanan_klimat$Total_RR_klm, method = "pearson")
cor_bulanan_citeko <- cor(bulanan_citeko$Total_RR_bdg, bulanan_citeko$Total_RR_ctk, method = "pearson")
# Cetak Hasilnya
cat("====================================\n")
## ====================================
cat("KORELASI CURAH HUJAN BULANAN\n")
## KORELASI CURAH HUJAN BULANAN
cat("====================================\n")
## ====================================
cat("Korelasi Bulanan Bandung - Staklim Jabar:", round(cor_bulanan_klimat, 3), "\n")
## Korelasi Bulanan Bandung - Staklim Jabar: 0.596
cat("Korelasi Bulanan Bandung - Citeko:", round(cor_bulanan_citeko, 3), "\n")
## Korelasi Bulanan Bandung - Citeko: 0.636