Input Data

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# === DATA BANDUNG ===
bandung = read_excel("D:\\Semester 6\\DC\\2\\Data  Suhu dan CH Kota Bandung.xlsx",
                      sheet = "Sheet1", 
                      guess_max = 50000)

# === DATA STASIUN PEMBANDING ===
citeko = read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx",
                     sheet = "Stasiun Citeko", 
                     skip = 8, 
                     guess_max = 50000)

klimat = read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx",
                     sheet = "Stasiun Klimatologi jawa barat", 
                     skip = 8, 
                     guess_max = 50000)
names(citeko)
## [1] "Tanggal" "Tn"      "Tx"      "Tavg"    "RH_avg"  "RR"
names(klimat)
## [1] "Tanggal" "Tn"      "Tx"      "Tavg"    "RH_avg"  "RR"
names(bandung)
## [1] "Tanggal" "Tn"      "Tx"      "RR"

Persiapan Data

# Bersihkan kode missing
clean_missing <- function(x){
  x[x == -99.9] <- NA
  x[x == 8888] <- NA
  return(x)
}

bandung$RR <- clean_missing(bandung$RR)
bandung$Tn <- clean_missing(bandung$Tn)
bandung$Tx <- clean_missing(bandung$Tx)

citeko$RR <- clean_missing(citeko$RR)
citeko$Tn <- clean_missing(citeko$Tn)
citeko$Tx <- clean_missing(citeko$Tx)

klimat$RR <- clean_missing(klimat$RR)
# Panggil ulang datanya
citeko <- read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Citeko", skip = 8, guess_max = 50000)

klimat <- read_excel("D:\\data_deska\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Klimatologi jawa barat", skip = 8, guess_max = 50000)

# Untuk data Bandung, fungsi as.Date biasa sepertinya sudah berhasil tanpa error
bandung$Tanggal <- as.Date(bandung$Tanggal)

# Untuk data Citeko dan Klimat, kita terjemahkan angka serial Excel-nya
citeko$Tanggal <- as.Date(as.numeric(citeko$Tanggal), origin = "1899-12-30")
## Warning in as.Date(as.numeric(citeko$Tanggal), origin = "1899-12-30"): NAs
## introduced by coercion
klimat$Tanggal <- as.Date(as.numeric(klimat$Tanggal), origin = "1899-12-30")
## Warning in as.Date(as.numeric(klimat$Tanggal), origin = "1899-12-30"): NAs
## introduced by coercion
# Cek hasilnya
head(citeko$Tanggal)
## [1] "1975-01-01" "1975-01-02" "1975-01-03" "1975-01-04" "1975-01-05"
## [6] "1975-01-06"
bandung$Tanggal <- as.Date(bandung$Tanggal)
citeko$Tanggal <- as.Date(citeko$Tanggal)
klimat$Tanggal <- as.Date(klimat$Tanggal)

data_suhu <- merge(bandung, citeko, by="Tanggal", suffixes=c("_bdg","_ctk"))
data_hujan <- merge(bandung, klimat, by="Tanggal", suffixes=c("_bdg","_klm"))

Imputasi Suhu (Regresi Linear)

# Ubah tipe data menjadi numerik
# Lakukan untuk suhu maksimum
data_suhu$Tx_bdg = as.numeric(data_suhu$Tx_bdg)
data_suhu$Tx_ctk = as.numeric(data_suhu$Tx_ctk)

# (Sekalian untuk suhu minimum agar nanti tidak error juga)
data_suhu$Tn_bdg = as.numeric(data_suhu$Tn_bdg)
data_suhu$Tn_ctk = as.numeric(data_suhu$Tn_ctk)

Tmin (minimum)

# Suhu minimum
model_tn <- lm(Tn_bdg ~ Tn_ctk, data=data_suhu)
missing_tn <- is.na(data_suhu$Tn_bdg)

data_suhu$Tn_bdg[missing_tn] <- predict(model_tn,
                                        newdata=data_suhu[missing_tn,])

Tmax (maximum)

# Suhu maximum 
model_tx <- lm(Tx_bdg ~ Tx_ctk, data=data_suhu)
missing_tx <- is.na(data_suhu$Tx_bdg)

data_suhu$Tx_bdg[missing_tx] <- predict(model_tx,
                                        newdata=data_suhu[missing_tx,])

Imputasi Curah Hujan (Normal Ratio)

# Ubah tipe data jadi numerik 
data_hujan$RR_bdg = as.numeric(data_hujan$RR_bdg)
data_hujan$RR_klm = as.numeric(data_hujan$RR_klm)

# Hitung rataannya 
mean_bdg <- mean(data_hujan$RR_bdg, na.rm=TRUE)
mean_klm <- mean(data_hujan$RR_klm, na.rm=TRUE)

# Imputasi Normal Ratio
missing_rr <- is.na(data_hujan$RR_bdg)

data_hujan$RR_bdg[missing_rr] <-
  (mean_bdg / mean_klm) * data_hujan$RR_klm[missing_rr]

Validasi Hasil Imputasi

# Membuat rekap 
rekap_fungsi = function(x) {
  c(Min = min(x, na.rm = TRUE),
    Q1 = quantile(x, 0.25, na.rm = TRUE),
    Median = median(x, na.rm = TRUE),
    Mean = round(mean(x, na.rm = TRUE), 2),
    Q3 = quantile(x, 0.75, na.rm = TRUE),
    Max = max(x, na.rm = TRUE),
    Jumlah_NA = sum(is.na(x)))
}

# Menggabungkan ketiga variabel menjadi 1 tabel (matrix)  
tabel_rapi = rbind(
  Tn_Bandung = rekap_fungsi(data_suhu$Tn_bdg),
  Tx_Bandung = rekap_fungsi(data_suhu$Tx_bdg),
  RR_Bandung = rekap_fungsi(data_hujan$RR_bdg)
)

# Hasilnya
print(tabel_rapi)
##             Min Q1.25% Median  Mean Q3.75% Max Jumlah_NA
## Tn_Bandung 13.0   19.0   20.0 19.60   20.6  29        49
## Tx_Bandung 18.8   28.2   29.2 29.12   30.0  36        52
## RR_Bandung  0.0    0.0    0.5  6.83    7.0 160       254
data_hujan_all <- bandung %>%
  select(Tanggal, RR_bdg = RR) %>%
  left_join(klimat %>% select(Tanggal, RR_klm = RR), by="Tanggal") %>%
  left_join(citeko %>% select(Tanggal, RR_ctk = RR), by="Tanggal")
# Ubah tipe data jadi numerik
data_hujan_all$RR_bdg <- as.numeric(data_hujan_all$RR_bdg)
data_hujan_all$RR_klm <- as.numeric(data_hujan_all$RR_klm)
data_hujan_all$RR_ctk <- as.numeric(data_hujan_all$RR_ctk) # Citeko 

mean_bdg <- mean(data_hujan_all$RR_bdg, na.rm=TRUE)
mean_klm <- mean(data_hujan_all$RR_klm, na.rm=TRUE)
# Hitung rataan data Bandung
mean_bdg <- mean(data_hujan_all$RR_bdg, na.rm=TRUE)
mean_klm <- mean(data_hujan_all$RR_klm, na.rm=TRUE)

# Imputasi Normal Ratio 
missing_rr <- is.na(data_hujan_all$RR_bdg) & !is.na(data_hujan_all$RR_klm)

data_hujan_all$RR_bdg[missing_rr] <-
  (mean_bdg / mean_klm) * data_hujan_all$RR_klm[missing_rr]
# Hitung rataan Citeko 
mean_ctk <- mean(data_hujan_all$RR_ctk, na.rm=TRUE)

# Imputasi Normal Ratio
missing_rr2 <- is.na(data_hujan_all$RR_bdg) & !is.na(data_hujan_all$RR_ctk)

data_hujan_all$RR_bdg[missing_rr2] <-
  (mean_bdg / mean_ctk) * data_hujan_all$RR_ctk[missing_rr2]
# Cek missing value data hujan
missing_rr3 <- is.na(data_hujan_all$RR_bdg) &
               data_hujan_all$RR_klm == 0 &
               data_hujan_all$RR_ctk == 0

data_hujan_all$RR_bdg[missing_rr3] <- 0
# Mengisi data dengan maksimal gap 7 hari
data_hujan_all$RR_bdg <- na.approx(
  data_hujan_all$RR_bdg,
  na.rm = FALSE,
  maxgap = 7   # hanya isi gap ≤ 7 hari
)
cat("Missing RR setelah imputasi:",
    sum(is.na(data_hujan_all$RR_bdg)), "\n")
## Missing RR setelah imputasi: 13

Cek Ulang

# buat kolom bulan-hari
data_hujan_all <- data_hujan_all %>%
  mutate(
    bulan = month(Tanggal),
    hari = day(Tanggal)
  )

# hitung rata-rata hujan tiap tanggal kalender
klimat_harian <- data_hujan_all %>%
  group_by(bulan, hari) %>%
  summarise(mean_rr = mean(RR_bdg, na.rm = TRUE), .groups="drop")

# gabungkan kembali
data_hujan_all <- left_join(
  data_hujan_all,
  klimat_harian,
  by=c("bulan","hari")
)

# isi NA dengan klimatologi
missing_rr_final <- is.na(data_hujan_all$RR_bdg)

data_hujan_all$RR_bdg[missing_rr_final] <-
  data_hujan_all$mean_rr[missing_rr_final]

# Hasil 
cat("Missing RR final:",
    sum(is.na(data_hujan_all$RR_bdg)), "\n")
## Missing RR final: 0
summary(data_hujan_all$RR_bdg)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.0000   0.0000   0.4106   7.0733   6.8000 198.3290

Cek Korelasi

library(dplyr)
library(lubridate)

# =========================
# 1️⃣ FILTER PERIODE 1990–2022
# =========================

bandung_90 <- bandung %>%
  filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022)

klimat_90 <- klimat %>%
  filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022)

citeko_90 <- citeko %>%
  filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022)
# =========================
# 2️⃣ GABUNG DATA (HANYA YANG LENGKAP)
# =========================

# Bandung vs Staklim Jabar
corr_klimat <- bandung_90 %>%
  select(Tanggal, RR_bdg = RR) %>%
  inner_join(
    klimat_90 %>% select(Tanggal, RR_klm = RR),
    by = "Tanggal"
  ) %>%
  filter(!is.na(RR_bdg) & !is.na(RR_klm))

# Bandung vs Citeko
corr_citeko <- bandung_90 %>%
  select(Tanggal, RR_bdg = RR) %>%
  inner_join(
    citeko_90 %>% select(Tanggal, RR_ctk = RR),
    by = "Tanggal"
  ) %>%
  filter(!is.na(RR_bdg) & !is.na(RR_ctk))
# Ubah tipe data jadi numerik
corr_klimat$RR_bdg <- as.numeric(corr_klimat$RR_bdg)
corr_klimat$RR_klm <- as.numeric(corr_klimat$RR_klm)

corr_citeko$RR_bdg <- as.numeric(corr_citeko$RR_bdg)
corr_citeko$RR_ctk <- as.numeric(corr_citeko$RR_ctk)
# 1. Pastikan fungsi sapu pembersih sudah aktif
clean_missing <- function(x){
  x[x == -99.9] <- NA
  x[x == 8888] <- NA
  return(x)
}

# 2. Siapkan data Bandung vs Staklim Jabar
corr_klimat <- bandung %>%
  filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>%
  select(Tanggal, RR_bdg = RR) %>%
  inner_join(
    klimat %>% filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>% select(Tanggal, RR_klm = RR),
    by = "Tanggal"
  ) %>%
  # Ubah ke angka dulu
  mutate(RR_bdg = as.numeric(RR_bdg), RR_klm = as.numeric(RR_klm)) %>%
  # BERSIHKAN ANGKA 8888 SEKARANG
  mutate(RR_bdg = clean_missing(RR_bdg), RR_klm = clean_missing(RR_klm)) %>%
  # Baru buang NA-nya
  filter(!is.na(RR_bdg) & !is.na(RR_klm))

# 3. Siapkan data Bandung vs Citeko
corr_citeko <- bandung %>%
  filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>%
  select(Tanggal, RR_bdg = RR) %>%
  inner_join(
    citeko %>% filter(year(Tanggal) >= 1990 & year(Tanggal) <= 2022) %>% select(Tanggal, RR_ctk = RR),
    by = "Tanggal"
  ) %>%
  # Ubah ke angka dulu
  mutate(RR_bdg = as.numeric(RR_bdg), RR_ctk = as.numeric(RR_ctk)) %>%
  # BERSIHKAN ANGKA 8888 SEKARANG
  mutate(RR_bdg = clean_missing(RR_bdg), RR_ctk = clean_missing(RR_ctk)) %>%
  # Baru buang NA-nya
  filter(!is.na(RR_bdg) & !is.na(RR_ctk))

# 4. Hitung Korelasi
cor_klimat <- cor(corr_klimat$RR_bdg, corr_klimat$RR_klm, method = "pearson")
cor_citeko <- cor(corr_citeko$RR_bdg, corr_citeko$RR_ctk, method = "pearson")

# 5. Cetak Hasil
cat("====================================\n")
## ====================================
cat("Periode: 1990–2022\n")
## Periode: 1990–2022
cat("====================================\n")
## ====================================
cat("Korelasi Bandung - Staklim Jabar:", round(cor_klimat, 3), "\n")
## Korelasi Bandung - Staklim Jabar: 0.174
cat("Jumlah pasangan data:", nrow(corr_klimat), "\n\n")
## Jumlah pasangan data: 6853
cat("Korelasi Bandung - Citeko:", round(cor_citeko, 3), "\n")
## Korelasi Bandung - Citeko: 0.258
cat("Jumlah pasangan data:", nrow(corr_citeko), "\n")
## Jumlah pasangan data: 7829

Agregasi

library(dplyr)
library(lubridate)

# =========================================================
# AGREGASI DATA HARIAN MENJADI BULANAN
# =========================================================

# 1. Agregasi Bandung vs Klimat
bulanan_klimat <- corr_klimat %>%
  # Membuat kolom penanda Bulan dan Tahun (Contoh: "Jan 1990")
  mutate(Bulan_Tahun = floor_date(Tanggal, "month")) %>%
  group_by(Bulan_Tahun) %>%
  # Menjumlahkan hujan harian menjadi total bulanan
  summarise(
    Total_RR_bdg = sum(RR_bdg, na.rm = TRUE),
    Total_RR_klm = sum(RR_klm, na.rm = TRUE)
  )

# 2. Agregasi Bandung vs Citeko
bulanan_citeko <- corr_citeko %>%
  mutate(Bulan_Tahun = floor_date(Tanggal, "month")) %>%
  group_by(Bulan_Tahun) %>%
  summarise(
    Total_RR_bdg = sum(RR_bdg, na.rm = TRUE),
    Total_RR_ctk = sum(RR_ctk, na.rm = TRUE)
  )

# =========================================================
# HITUNG KORELASI BULANAN
# =========================================================

cor_bulanan_klimat <- cor(bulanan_klimat$Total_RR_bdg, bulanan_klimat$Total_RR_klm, method = "pearson")
cor_bulanan_citeko <- cor(bulanan_citeko$Total_RR_bdg, bulanan_citeko$Total_RR_ctk, method = "pearson")

# Cetak Hasilnya
cat("====================================\n")
## ====================================
cat("KORELASI CURAH HUJAN BULANAN\n")
## KORELASI CURAH HUJAN BULANAN
cat("====================================\n")
## ====================================
cat("Korelasi Bulanan Bandung - Staklim Jabar:", round(cor_bulanan_klimat, 3), "\n")
## Korelasi Bulanan Bandung - Staklim Jabar: 0.596
cat("Korelasi Bulanan Bandung - Citeko:", round(cor_bulanan_citeko, 3), "\n")
## Korelasi Bulanan Bandung - Citeko: 0.636