library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)   
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readxl)

Citeko

# Membaca data, membersihkan baris kosong, dan mengambil kolom RR
citeko = read_xlsx("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Citeko",  
                  skip = 8,
                  guess_max = 50000) %>%        # Ini mantra penyelamat angkanya
  filter(!is.na(Tanggal)) %>%
  select(Tanggal, RR_C = RR) %>%
  mutate(RR_C = as.numeric(RR_C))                # Pastikan jadi angka
# Cek hasilnya
head(citeko)
## # A tibble: 6 × 2
##   Tanggal  RR_C
##   <chr>   <dbl>
## 1 27395      NA
## 2 27396      NA
## 3 27397      NA
## 4 27398      NA
## 5 27399      NA
## 6 27400      NA
str(citeko)
## tibble [17,545 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Tanggal: chr [1:17545] "27395" "27396" "27397" "27398" ...
##  $ RR_C   : num [1:17545] NA NA NA NA NA NA NA NA NA NA ...

Jabar

# Membaca data, membersihkan baris kosong, dan mengambil kolom RR
jabar = read_xlsx("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Stasiun Staisun lain.xlsx", sheet = "Stasiun Klimatologi jawa barat",
                    skip = 8,
                    guess_max = 50000) %>%
  filter(!is.na(Tanggal)) %>%
  select(Tanggal, RR_J = RR) %>%
  mutate(RR_J = as.numeric(RR_J))

# Cek hasilnya
head(jabar)
## # A tibble: 6 × 2
##   Tanggal  RR_J
##   <chr>   <dbl>
## 1 27395      NA
## 2 27396      NA
## 3 27397      NA
## 4 27398      NA
## 5 27399      NA
## 6 27400      NA
str(jabar)
## tibble [17,545 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Tanggal: chr [1:17545] "27395" "27396" "27397" "27398" ...
##  $ RR_J   : num [1:17545] NA NA NA NA NA NA NA NA NA NA ...

Bandung

bandung = read_xlsx("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Data  Suhu dan CH Kota Bandung (1).xlsx", 
                     sheet = "1975-2022", 
                     guess_max = 50000) %>%   
  filter(!is.na(Tanggal)) %>%
  select(Tanggal, RR_B = RR) %>%
  mutate(RR_B = as.numeric(RR_B))
## New names:
## • `Tahun` -> `Tahun...5`
## • `` -> `...6`
## • `Tahun` -> `Tahun...7`
# Cek hasilnya
head(bandung)
## # A tibble: 6 × 2
##   Tanggal              RR_B
##   <dttm>              <dbl>
## 1 1980-01-01 00:00:00   1  
## 2 1980-01-02 00:00:00 -99.9
## 3 1980-01-03 00:00:00 -99.9
## 4 1980-01-04 00:00:00   1  
## 5 1980-01-05 00:00:00 -99.9
## 6 1980-01-06 00:00:00   1

Kertajati

library(dplyr)
library(readr)
library(lubridate) 

kertajati = read_csv("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Stasiun Meteorologi Kertajati_01-01-1975_31-12-2022.xlsx - Data Harian - Table.csv", 
                      skip = 8, 
                      guess_max = 50000) %>% 
  select(Tanggal, RR_KJT = RR) %>%
  mutate(Tanggal = dmy(Tanggal)) %>%
  # Sapu bersih 13 baris cacat di ujung bawah data
  filter(!is.na(Tanggal))
## Rows: 17548 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Tanggal, ddd_car
## dbl (9): Tn, Tx, Tavg, RH_avg, RR, ss, ff_x, ddd_x, ff_avg
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Tanggal = dmy(Tanggal)`.
## Caused by warning:
## !  13 failed to parse.
# Cek hasilnya
str(kertajati)
## tibble [17,532 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Tanggal: Date[1:17532], format: "1975-01-01" "1975-01-02" ...
##  $ RR_KJT : num [1:17532] NA NA NA NA NA NA NA NA NA NA ...
head(kertajati)
## # A tibble: 6 × 2
##   Tanggal    RR_KJT
##   <date>      <dbl>
## 1 1975-01-01     NA
## 2 1975-01-02     NA
## 3 1975-01-03     NA
## 4 1975-01-04     NA
## 5 1975-01-05     NA
## 6 1975-01-06     NA

CHIRPS

chirps = read_csv("C:\\Users\\MUTHI'AH IFFA\\Downloads\\ClimateEngine (1).csv")
## Rows: 12053 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Tanggal
## dbl (1): RR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
chirps_bersih = chirps %>%
  select(Tanggal, RR_S = RR) %>%
  mutate(Tanggal = mdy(Tanggal)) # Coba mdy kalau dmy gagal

# 2. Cek lagi apakah tahunnya sudah 1975 atau belum
head(chirps_bersih$Tanggal)
## [1] "1990-01-01" "1990-01-02" "1990-01-03" "1990-01-04" "1990-01-05"
## [6] "1990-01-06"
head(chirps)
## # A tibble: 6 × 2
##   Tanggal     RR
##   <chr>    <dbl>
## 1 1/1/1990 13.2 
## 2 1/2/1990  3.86
## 3 1/3/1990  4.41
## 4 1/4/1990 11.6 
## 5 1/5/1990  3.86
## 6 1/6/1990  9.17

Samakan format data

# 1. Bersihkan masing-masing stasiun dengan sangat teliti
chirps_ok = chirps_bersih %>% 
  mutate(Tanggal = as.Date(Tanggal)) %>%
  filter(!is.na(Tanggal))

bandung_ok = bandung %>% 
  mutate(Tanggal = as.Date(Tanggal)) %>%
  filter(!is.na(Tanggal))

citeko_ok = citeko %>% 
  # Paksa jadi angka dulu, abaikan warning NAs introduced
  mutate(tgl_num = as.numeric(Tanggal)) %>% 
  mutate(Tanggal = as.Date(tgl_num, origin = "1899-12-30")) %>%
  filter(!is.na(Tanggal)) %>%
  select(Tanggal, RR_C)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `tgl_num = as.numeric(Tanggal)`.
## Caused by warning:
## ! NAs introduced by coercion
jabar_ok = jabar %>% 
  mutate(tgl_num = as.numeric(Tanggal)) %>% 
  mutate(Tanggal = as.Date(tgl_num, origin = "1899-12-30")) %>%
  filter(!is.na(Tanggal)) %>%
  select(Tanggal, RR_J)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `tgl_num = as.numeric(Tanggal)`.
## Caused by warning:
## ! NAs introduced by coercion
kertajati_ok = kertajati %>% 
  mutate(Tanggal = as.Date(Tanggal)) %>%
  filter(!is.na(Tanggal))

Gabungkan data

# TAHAP PENGGABUNGAN  
data_master_final = chirps_ok %>%
  inner_join(bandung_ok, by = "Tanggal") %>%
  inner_join(citeko_ok, by = "Tanggal") %>%
  inner_join(jabar_ok, by = "Tanggal") %>%
  inner_join(kertajati_ok, by = "Tanggal")

# TAHAP MEMBERSIHKAN ANGKA 8888 (Data Error BMKG) 
data_siap = data_master_final %>%
  mutate(across(starts_with("RR"), ~ {
    .x = as.numeric(.x) # Pastikan semua kolom RR adalah angka
    ifelse(.x %in% c(8888, 9999, -99, -99.9, 999), NA, 
           ifelse(.x > 500 | .x < 0, NA, .x))
  })) %>%
  # Hapus baris yang ada NA-nya supaya regresi bisa jalan
  filter(if_all(starts_with("RR"), ~ !is.na(.x)))
# Cek 
cat("Jumlah data yang berhasil disatukan:", nrow(data_siap), "hari")
## Jumlah data yang berhasil disatukan: 8792 hari

Regresi

# Regresikan
model_final = lm(RR_B~ RR_S + RR_C + RR_J + RR_KJT, data = data_master_final)

# 5. LIHAT HASILNYA!
summary(model_final)
## 
## Call:
## lm(formula = RR_B ~ RR_S + RR_C + RR_J + RR_KJT, data = data_master_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -114.951    0.696    2.023    7.241  164.500 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.0173696  0.3677432  -5.486 4.21e-08 ***
## RR_S         0.3532582  0.0341050  10.358  < 2e-16 ***
## RR_C        -0.0004257  0.0002268  -1.877   0.0606 .  
## RR_J        -0.0003132  0.0001239  -2.527   0.0115 *  
## RR_KJT      -0.0002747  0.0002102  -1.307   0.1913    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.23 on 10543 degrees of freedom
##   (1505 observations deleted due to missingness)
## Multiple R-squared:  0.01109,    Adjusted R-squared:  0.01072 
## F-statistic: 29.56 on 4 and 10543 DF,  p-value: < 2.2e-16