library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readxl)
Citeko
# Membaca data, membersihkan baris kosong, dan mengambil kolom RR
citeko = read_xlsx("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Citeko",
skip = 8,
guess_max = 50000) %>% # Ini mantra penyelamat angkanya
filter(!is.na(Tanggal)) %>%
select(Tanggal, RR_C = RR) %>%
mutate(RR_C = as.numeric(RR_C)) # Pastikan jadi angka
# Cek hasilnya
head(citeko)
## # A tibble: 6 × 2
## Tanggal RR_C
## <chr> <dbl>
## 1 27395 NA
## 2 27396 NA
## 3 27397 NA
## 4 27398 NA
## 5 27399 NA
## 6 27400 NA
str(citeko)
## tibble [17,545 × 2] (S3: tbl_df/tbl/data.frame)
## $ Tanggal: chr [1:17545] "27395" "27396" "27397" "27398" ...
## $ RR_C : num [1:17545] NA NA NA NA NA NA NA NA NA NA ...
Jabar
# Membaca data, membersihkan baris kosong, dan mengambil kolom RR
jabar = read_xlsx("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Klimatologi jawa barat",
skip = 8,
guess_max = 50000) %>%
filter(!is.na(Tanggal)) %>%
select(Tanggal, RR_J = RR) %>%
mutate(RR_J = as.numeric(RR_J))
# Cek hasilnya
head(jabar)
## # A tibble: 6 × 2
## Tanggal RR_J
## <chr> <dbl>
## 1 27395 NA
## 2 27396 NA
## 3 27397 NA
## 4 27398 NA
## 5 27399 NA
## 6 27400 NA
str(jabar)
## tibble [17,545 × 2] (S3: tbl_df/tbl/data.frame)
## $ Tanggal: chr [1:17545] "27395" "27396" "27397" "27398" ...
## $ RR_J : num [1:17545] NA NA NA NA NA NA NA NA NA NA ...
Bandung
bandung = read_xlsx("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Data Suhu dan CH Kota Bandung (1).xlsx",
sheet = "1975-2022",
guess_max = 50000) %>%
filter(!is.na(Tanggal)) %>%
select(Tanggal, RR_B = RR) %>%
mutate(RR_B = as.numeric(RR_B))
## New names:
## • `Tahun` -> `Tahun...5`
## • `` -> `...6`
## • `Tahun` -> `Tahun...7`
# Cek hasilnya
head(bandung)
## # A tibble: 6 × 2
## Tanggal RR_B
## <dttm> <dbl>
## 1 1980-01-01 00:00:00 1
## 2 1980-01-02 00:00:00 -99.9
## 3 1980-01-03 00:00:00 -99.9
## 4 1980-01-04 00:00:00 1
## 5 1980-01-05 00:00:00 -99.9
## 6 1980-01-06 00:00:00 1
Kertajati
library(dplyr)
library(readr)
library(lubridate)
kertajati = read_csv("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Stasiun Meteorologi Kertajati_01-01-1975_31-12-2022.xlsx - Data Harian - Table.csv",
skip = 8,
guess_max = 50000) %>%
select(Tanggal, RR_KJT = RR) %>%
mutate(Tanggal = dmy(Tanggal)) %>%
# Sapu bersih 13 baris cacat di ujung bawah data
filter(!is.na(Tanggal))
## Rows: 17548 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Tanggal, ddd_car
## dbl (9): Tn, Tx, Tavg, RH_avg, RR, ss, ff_x, ddd_x, ff_avg
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Tanggal = dmy(Tanggal)`.
## Caused by warning:
## ! 13 failed to parse.
# Cek hasilnya
str(kertajati)
## tibble [17,532 × 2] (S3: tbl_df/tbl/data.frame)
## $ Tanggal: Date[1:17532], format: "1975-01-01" "1975-01-02" ...
## $ RR_KJT : num [1:17532] NA NA NA NA NA NA NA NA NA NA ...
head(kertajati)
## # A tibble: 6 × 2
## Tanggal RR_KJT
## <date> <dbl>
## 1 1975-01-01 NA
## 2 1975-01-02 NA
## 3 1975-01-03 NA
## 4 1975-01-04 NA
## 5 1975-01-05 NA
## 6 1975-01-06 NA
CHIRPS
chirps = read_csv("C:\\Users\\MUTHI'AH IFFA\\Downloads\\ClimateEngine (1).csv")
## Rows: 12053 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Tanggal
## dbl (1): RR
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
chirps_bersih = chirps %>%
select(Tanggal, RR_S = RR) %>%
mutate(Tanggal = mdy(Tanggal)) # Coba mdy kalau dmy gagal
# 2. Cek lagi apakah tahunnya sudah 1975 atau belum
head(chirps_bersih$Tanggal)
## [1] "1990-01-01" "1990-01-02" "1990-01-03" "1990-01-04" "1990-01-05"
## [6] "1990-01-06"
head(chirps)
## # A tibble: 6 × 2
## Tanggal RR
## <chr> <dbl>
## 1 1/1/1990 13.2
## 2 1/2/1990 3.86
## 3 1/3/1990 4.41
## 4 1/4/1990 11.6
## 5 1/5/1990 3.86
## 6 1/6/1990 9.17
Gabungkan data
# TAHAP PENGGABUNGAN
data_master_final = chirps_ok %>%
inner_join(bandung_ok, by = "Tanggal") %>%
inner_join(citeko_ok, by = "Tanggal") %>%
inner_join(jabar_ok, by = "Tanggal") %>%
inner_join(kertajati_ok, by = "Tanggal")
# TAHAP MEMBERSIHKAN ANGKA 8888 (Data Error BMKG)
data_siap = data_master_final %>%
mutate(across(starts_with("RR"), ~ {
.x = as.numeric(.x) # Pastikan semua kolom RR adalah angka
ifelse(.x %in% c(8888, 9999, -99, -99.9, 999), NA,
ifelse(.x > 500 | .x < 0, NA, .x))
})) %>%
# Hapus baris yang ada NA-nya supaya regresi bisa jalan
filter(if_all(starts_with("RR"), ~ !is.na(.x)))
# Cek
cat("Jumlah data yang berhasil disatukan:", nrow(data_siap), "hari")
## Jumlah data yang berhasil disatukan: 8792 hari
Regresi
# Regresikan
model_final = lm(RR_B~ RR_S + RR_C + RR_J + RR_KJT, data = data_master_final)
# 5. LIHAT HASILNYA!
summary(model_final)
##
## Call:
## lm(formula = RR_B ~ RR_S + RR_C + RR_J + RR_KJT, data = data_master_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -114.951 0.696 2.023 7.241 164.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.0173696 0.3677432 -5.486 4.21e-08 ***
## RR_S 0.3532582 0.0341050 10.358 < 2e-16 ***
## RR_C -0.0004257 0.0002268 -1.877 0.0606 .
## RR_J -0.0003132 0.0001239 -2.527 0.0115 *
## RR_KJT -0.0002747 0.0002102 -1.307 0.1913
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.23 on 10543 degrees of freedom
## (1505 observations deleted due to missingness)
## Multiple R-squared: 0.01109, Adjusted R-squared: 0.01072
## F-statistic: 29.56 on 4 and 10543 DF, p-value: < 2.2e-16