# ============================================
# 1. Import Data
setwd("D:/Kuliah/Lomba/UII Siaga Awards")
library(readxl)
data <- read_excel("banjir_sumatera.xlsx")


# ============================================S
# 2. Load Library
#Package yang diperlukan untuk data spasial
library(tmap)
## Warning: package 'tmap' was built under R version 4.5.2
library(raster)
## Warning: package 'raster' was built under R version 4.5.2
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.5.2
library(spdep)
## Warning: package 'spdep' was built under R version 4.5.2
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.5.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.5.2
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library(gdalraster)
## Warning: package 'gdalraster' was built under R version 4.5.2
## GDAL 3.11.4 (released 2025-09-04), GEOS 3.13.1, PROJ 9.7.0
## 
## Attaching package: 'gdalraster'
## The following objects are masked from 'package:raster':
## 
##     calc, rasterize
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(car)
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
library(sf)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
## The following object is masked from 'package:raster':
## 
##     extract
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:gdalraster':
## 
##     combine
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# ============================================
# 3. Mengambil Data Spasial

# Baca shapefile
Indonesia <- st_read("38provinsi.shp")
## Reading layer `38provinsi' from data source 
##   `D:\Kuliah\Lomba\UII Siaga Awards\38provinsi.shp' using driver `ESRI Shapefile'
## Simple feature collection with 38 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 94.97191 ymin: -11.00762 xmax: 141.02 ymax: 6.076832
## Geodetic CRS:  WGS 84
# Daftar provinsi Pulau Sumatera
prov_sumatera <- c(
  "Aceh",
  "Sumatera Utara",
  "Sumatera Barat",
  "Riau",
  "Kepulauan Riau",
  "Jambi",
  "Bengkulu",
  "Sumatera Selatan",
  "Lampung",
  "Kepulauan Bangka Belitung"
)


# Filter shapefile untuk wilayah barat Indonesia
Indonesia <- Indonesia %>%
  filter(WADMPR %in% prov_sumatera)


unique(Indonesia$WADMPR)
##  [1] "Aceh"                      "Sumatera Utara"           
##  [3] "Sumatera Barat"            "Riau"                     
##  [5] "Jambi"                     "Sumatera Selatan"         
##  [7] "Bengkulu"                  "Lampung"                  
##  [9] "Kepulauan Bangka Belitung" "Kepulauan Riau"
###### ========= plot(st_geometry(Indonesia), main = "Peta Pulau Sumatera")
Indonesia$id <- 1:10
Indonesia$id <- as.character(Indonesia$id)

# ============================================
# 4. Join Data ke Shp

Indonesia <- Indonesia %>%
  left_join(data, by = c("WADMPR" = "Provinsi"))


spProv <- Indonesia %>% 
  select(
    ID = id,
    Provinsi = WADMPR,
    Y = Kejadian,
    X1 = Kepadatan,
    X2 = Hujan,
    X3 = Sungai,
    X4 = Sampah,
    X5 = Deforestasi,
    Geometry = geometry
  )

if (!inherits(spProv, "sf")) {
  spProv <- st_as_sf(spProv)
}

# Proyeksikan ke CRS metrik (contoh: UTM Zone 49S, EPSG:32749)
spProv_utm <- st_transform(spProv, crs = 32749)
colSums(is.na(spProv_utm))
##       ID Provinsi        Y       X1       X2       X3       X4       X5 
##        0        0        0        0        0        0        0        0 
## Geometry 
##        0
# ============================================
# 5. Sebaran Indeks Banjir
library(sp)
library(classInt)
## Warning: package 'classInt' was built under R version 4.5.2
library(RColorBrewer)

# Pastikan data bertipe Spatial* untuk spplot()
spProv_sp <- as(spProv_utm, "Spatial")

# Ambil data nilai
val <- spProv_sp$Y

# Tentukan breaks manual
breaks <- c(2, 14, 46, 61, 120)

# Potong nilai ke dalam kelas
cat <- cut(val,
           breaks = breaks,
           include.lowest = TRUE,
           dig.lab = 5,
           labels = c("Sangat Sedikit","Sedikit", "Banyak", "Sangat Banyak")
)

# Simpan kategori ke data
spProv_sp$Y_cat <- factor(cat, levels =c("Sangat Sedikit","Sedikit", "Banyak", "Sangat Banyak"))

# Warna gradasi (Rendah = hijau, Sedang = kuning, Tinggi = merah)
colors <- c("#00A300",  # Hijau
            "#FFFF00",  # Kuning
            "#FFA500",  # Jingga
            "#B22222")  # Merah


# Tampilkan peta
print(
  spplot(spProv_sp, zcol = "Y_cat",
         main = "Peta Choropleth: Variabel Y",
         col.regions = colors,
         par.settings = list(
           layout.heights = list(key.bottom = 2),
           axis.line = list(col = NA)
         ),
         colorkey = list(
           space = "right",
           labels = list(
             at = c(1, 2, 3, 4),
             labels = c("Sangat Sedikit","Sedikit", "Banyak", "Sangat Banyak"),
             cex = 1.1
           ),
           width = 2,   # lebar legend
           height = 0.8 # tinggi legend
         ))
)

# ============================================
# 6. Explorasi Data
str(data)
## tibble [10 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi   : chr [1:10] "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
##  $ Kejadian   : num [1:10] 47 120 64 53 44 69 9 28 2 4
##  $ Kepadatan  : num [1:10] 98 215 139 75 76 102 105 281 92 264
##  $ Hujan      : num [1:10] 14 43 10 2 7 9 3 24 3 16
##  $ Sungai     : num [1:10] 360 615 454 379 503 746 210 358 21 48
##  $ Sampah     : num [1:10] 932 1218 803 754 237 ...
##  $ Deforestasi: num [1:10] 11209 7035 6634 29702 8291 ...
summary(data)
##    Provinsi            Kejadian        Kepadatan         Hujan     
##  Length:10          Min.   :  2.00   Min.   : 75.0   Min.   : 2.0  
##  Class :character   1st Qu.: 13.75   1st Qu.: 93.5   1st Qu.: 4.0  
##  Mode  :character   Median : 45.50   Median :103.5   Median : 9.5  
##                     Mean   : 44.00   Mean   :144.7   Mean   :13.1  
##                     3rd Qu.: 61.25   3rd Qu.:196.0   3rd Qu.:15.5  
##                     Max.   :120.00   Max.   :281.0   Max.   :43.0  
##      Sungai          Sampah        Deforestasi     
##  Min.   : 21.0   Min.   : 146.3   Min.   :  213.2  
##  1st Qu.:247.0   1st Qu.: 178.3   1st Qu.: 4238.9  
##  Median :369.5   Median : 684.0   Median : 6149.2  
##  Mean   :369.4   Mean   : 602.6   Mean   : 7803.0  
##  3rd Qu.:490.8   3rd Qu.: 899.4   3rd Qu.: 7976.6  
##  Max.   :746.0   Max.   :1218.4   Max.   :29702.1
colSums(is.na(data))
##    Provinsi    Kejadian   Kepadatan       Hujan      Sungai      Sampah 
##           0           0           0           0           0           0 
## Deforestasi 
##           0
# Visualisasi distribusi tiap variabel
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
data_long <- melt(data)
## Using Provinsi as id variables
ggplot(data_long, aes(x = value)) + 
  facet_wrap(~variable, scales = "free") +
  geom_histogram(bins = 30, fill = "steelblue", color = "white") +
  theme_minimal()

# Korelasi antar variabel numerik
datacor <- data %>% 
  select(
    Y = Kejadian,
    X1 = Kepadatan,
    X2 = Hujan,
    X3 = Sungai,
    X4 = Sampah,
    X5 = Deforestasi,
  )
cor_matrix <- cor(datacor[, c("Y", "X1", "X2", "X3", "X4","X5")])
print(cor_matrix)
##              Y           X1         X2         X3        X4         X5
## Y  1.000000000  0.001720627  0.6134465  0.8426127 0.7620081  0.2353831
## X1 0.001720627  1.000000000  0.6999241 -0.1465994 0.2962650 -0.5547698
## X2 0.613446469  0.699924062  1.0000000  0.3341439 0.6801967 -0.3170264
## X3 0.842612705 -0.146599380  0.3341439  1.0000000 0.5863719  0.1275042
## X4 0.762008123  0.296265040  0.6801967  0.5863719 1.0000000  0.1990301
## X5 0.235383150 -0.554769786 -0.3170264  0.1275042 0.1990301  1.0000000
# Visualisasi korelasi
library(corrplot)
## corrplot 0.95 loaded
corrplot(
  cor_matrix,
  method = "color",
  type = "upper",
  addCoef.col = "black",
  number.cex = 0.75,
  tl.cex = 0.9,
  tl.col = "gray15",
  tl.srt = 0,
  col = colorRampPalette(c("#2166AC", "#FFFFFF", "#B2182B"))(200),
  outline = TRUE,
  addgrid.col = "gray80"
)

library(dplyr)

summary_stats <- datacor %>%
  dplyr::select("Y", "X1", "X2", "X3", "X4","X5") %>%
  summarise_all(list(
    Min = ~min(.),
    Q1 = ~quantile(., 0.25),
    Median = ~median(.),
    Mean = ~mean(.),
    Q3 = ~quantile(., 0.75),
    Max = ~max(.),
    SD = ~sd(.)
  )) %>%
  pivot_longer(everything(),
               names_to = c("Variable", "Statistic"),
               names_sep = "_") %>%
  pivot_wider(names_from = Statistic, values_from = value)

# Hitung korelasi masing-masing variabel ke Y
# Karena korelasi Y ke Y = 1, variabel lain korelasi ke Y dihitung
variabel <- c("Y", "X1", "X2", "X3", "X4","X5")
korelasi_ke_Y <- sapply(variabel, function(v) cor(datacor[[v]], datacor$Y))

# Buat data frame korelasi agar bisa digabung
df_korelasi <- data.frame(Variable = variabel,
                          Korelasi_ke_Y = korelasi_ke_Y)

# Gabungkan summary dengan korelasi
summary_final <- summary_stats %>%
  left_join(df_korelasi, by = "Variable")

kable(summary_final)
Variable Min Q1 Median Mean Q3 Max SD Korelasi_ke_Y
Y 2.0000 13.750 45.500 44.0000 61.2500 120.000 36.11094 1.0000000
X1 75.0000 93.500 103.500 144.7000 196.0000 281.000 78.68368 0.0017206
X2 2.0000 4.000 9.500 13.1000 15.5000 43.000 12.52952 0.6134465
X3 21.0000 247.000 369.500 369.4000 490.7500 746.000 230.35055 0.8426127
X4 146.2964 178.252 683.966 602.5841 899.3931 1218.385 401.41449 0.7620081
X5 213.1641 4238.945 6149.155 7803.0490 7976.6441 29702.085 8389.51989 0.2353831
# ============================================
# 7. OLS Model
wr_formula <- Y ~ X1 + X2 + X3 + X4 + X5
# Model OLS
ols_model <- lm(wr_formula, data = spProv_utm)

# Ringkasan hasil regresi
summary(ols_model)
## 
## Call:
## lm(formula = wr_formula, data = spProv_utm)
## 
## Residuals:
##        1        2        3        4        5        6        7        8 
##  -8.1923   4.4885  19.3063  -0.1267  -7.7621   0.1369  -3.1898 -12.1154 
##        9       10 
##   2.8427   4.6118 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.9811898 19.1046336  -0.104   0.9224  
## X1          -0.1346840  0.1088165  -1.238   0.2835  
## X2           2.0206381  0.7364672   2.744   0.0517 .
## X3           0.0823605  0.0287119   2.869   0.0455 *
## X4           0.0017466  0.0218248   0.080   0.9401  
## X5           0.0009641  0.0007900   1.220   0.2893  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.29 on 4 degrees of freedom
## Multiple R-squared:  0.9398, Adjusted R-squared:  0.8645 
## F-statistic: 12.49 on 5 and 4 DF,  p-value: 0.01491
# Library
library(car)

# Variance Inflation Factor
vif(ols_model)
##       X1       X2       X3       X4       X5 
## 3.735047 4.338233 2.228643 3.910410 2.237781
# Library
library(lmtest)

# Uji Breusch-Pagan
bptest(ols_model)
## 
##  studentized Breusch-Pagan test
## 
## data:  ols_model
## BP = 4.1149, df = 5, p-value = 0.533
# Histogram residual
hist(residuals(ols_model), main = "Histogram Residual", xlab = "Residuals")

library(tseries)
jarque.bera.test(residuals(ols_model))
## 
##  Jarque Bera Test
## 
## data:  residuals(ols_model)
## X-squared = 1.0567, df = 2, p-value = 0.5896
# ============================================
# 8. Geographically Weighted Regression (GWR)
# a. Persiapan Data GWR
library(spgwr)
## Warning: package 'spgwr' was built under R version 4.5.2
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
spProv_utm <- st_as_sf(spProv_utm)
# b. Ubah ke lon-lat (WGS84)
gabung_final<-spProv_utm
gabung_final_ll <- st_transform(spProv_utm, crs = 4326)

# c. Validasi geometri
gabung_final_ll <- st_make_valid(gabung_final_ll)

# d. Hitung centroid dan ekstrak koordinat
centroid <- st_centroid(gabung_final_ll)
## Warning: st_centroid assumes attributes are constant over geometries
coords <- st_coordinates(centroid)

# e. Tambah koordinat ke data
gabung_final_ll$lon_centroid <- coords[, 1]
gabung_final_ll$lat_centroid <- coords[, 2]

# f. Buat salinan data.frame
# Tambahkan ID ke data yang akan dikonversi
gabung_df <- st_drop_geometry(gabung_final_ll)
gabung_df$id <- gabung_final_ll$id


# g. Konversi ke SpatialPointsDataFrame
coordinates(gabung_df) <- ~lon_centroid + lat_centroid
proj4string(gabung_df) <- CRS("+proj=longlat +datum=WGS84")

# h. Ini adalah objek final untuk spgwr
gabung_sp <- gabung_df


# ============================================
# 9. Pemilihan Pembobot dan Bandwith Terbaik
library(tibble)

# Formula tetap
gwr_formula <-  Y ~ X1 + X2 + X3 + X4 + X5
# Model OLS
y_actual <- gabung_sp$Y

# Fungsi menangkap ringkasan GWR
capture_gwr_summary <- function(model, y_actual, formula) {
  printed <- capture.output(print(model))
  ambil <- function(kunci) {
    baris <- grep(kunci, printed, value = TRUE)
    if (length(baris) == 0) return(NA_real_)
    as.numeric(sub(".*:\\s*", "", baris))
  }
  aic_val  <- ambil("^AIC \\(GWR")
  aicc_val <- ambil("^AICc")
  rss      <- ambil("^Residual sum of squares")
  r2       <- ambil("^Quasi-global R2")
  tr_S     <- model$results$edf
  n <- length(y_actual)
  k <- length(all.vars(formula)) - 1
  adj_r2 <- 1 - ((1 - r2) * (n - 1) / (n - k - 1))
  tibble(AIC = aic_val, AICc = aicc_val, RSS = rss, R2 = r2, Adjusted_R2 = adj_r2, traceS = tr_S)
}

# Daftar kernel dan tipe bandwidth
kernels <- c("gaussian", "bisquare")
bandwidth_types <- c("adaptive", "fixed")

# Simpan hasil GWR
result_list <- list()

for (kernel in kernels) {
  for (bw_type in bandwidth_types) {
    
    # Pilih fungsi bobot
    gweight_fun <- if (kernel == "gaussian") gwr.Gauss else gwr.bisquare
    
    # Pilih bandwidth
    bw <- gwr.sel(
      formula = gwr_formula,
      data = gabung_sp,
      gweight = gweight_fun,
      adapt = (bw_type == "adaptive"),
      longlat = TRUE,
      verbose = FALSE
    )
    
    # Jalankan GWR
    model <- gwr(
      formula = gwr_formula,
      data = gabung_sp,
      gweight = gweight_fun,
      hatmatrix = TRUE,
      se.fit = TRUE,
      longlat = TRUE,
      adapt = if (bw_type == "adaptive") bw else NULL,
      bandwidth = if (bw_type == "fixed") bw else NULL
    )
    
    # Ringkasan hasil
    summary_tbl <- capture_gwr_summary(model, y_actual, gwr_formula)
    summary_tbl$Kernel <- kernel
    summary_tbl$Bandwidth <- bw_type
    result_list[[length(result_list)+1]] <- summary_tbl
  }
}
## Warning in gwr.sel(formula = gwr_formula, data = gabung_sp, gweight =
## gweight_fun, : Bandwidth converged to upper bound:1
## Warning in gwr.sel(formula = gwr_formula, data = gabung_sp, gweight =
## gweight_fun, : Bandwidth converged to upper bound:1473.69459375554
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/NaN replaced by maximum positive value
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/NaN replaced by maximum positive value
## Warning in optimize(gwr.cv.f, lower = beta1, upper = beta2, maximum = FALSE, :
## NA/NaN replaced by maximum positive value
## Warning in optimize(gwr.cv.f, lower = beta1, upper = beta2, maximum = FALSE, :
## NA/NaN replaced by maximum positive value
# Tambahkan model OLS
ols_model <- lm(gwr_formula, data = gabung_sp)
ols_rss <- sum(resid(ols_model)^2)
ols_tss <- sum((y_actual - mean(y_actual))^2)
ols_r2 <- 1 - (ols_rss / ols_tss)
n <- nrow(gabung_sp)
k <- length(all.vars(gwr_formula)) - 1
ols_adj_r2 <- 1 - ((1 - ols_r2) * (n - 1) / (n - k - 1))
ols_aic <- AIC(ols_model)
ols_aicc <- ols_aic + (2 * k * (k + 1)) / (n - k - 1)

ols_summary <- tibble(
  AIC = ols_aic,
  AICc = ols_aicc,
  RSS = ols_rss,
  R2 = ols_r2,
  Adjusted_R2 = ols_adj_r2,
  traceS = NA_real_,
  Kernel = "OLS",
  Bandwidth = "NA"
)

# Gabungkan semua hasil
final_results <- bind_rows(result_list, ols_summary)

# Tampilkan tabel
kable(final_results)
AIC AICc RSS R2 Adjusted_R2 traceS Kernel Bandwidth
75.05695 158.60800 563.500600 0.9519853 0.8919669 3.3322150 gaussian adaptive
76.39625 150.27910 654.702600 0.9442142 0.8744820 3.6191817 gaussian fixed
36.85328 -114.50500 9.046264 0.9992292 0.9982657 0.2253451 bisquare adaptive
70.30317 398.83460 312.806100 0.9733464 0.9400294 2.0199195 bisquare fixed
84.95738 99.95738 706.586553 0.9397932 0.8645348 NA OLS NA
# ============================================
# 10. Model GWR Terbaik

library(spgwr)

# a. Definisikan formula GWR
gwr_formula <-  Y ~ X1 + X2 + X3 + X4 + X5
# Model OLS

# b. Pilih kernel dan tipe bandwidth
kernel <- "bisquare"
bw_type <- "adaptive"

# c. Pilih fungsi bobot
gweight_fun <- gwr.bisquare

# d. Cari bandwidth adaptif
bw_adaptive <- gwr.sel(
  formula = gwr_formula,
  data = gabung_sp,
  gweight = gweight_fun,
  adapt = TRUE,
  longlat = TRUE
)
## Adaptive q: 0.381966 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/NaN replaced by maximum positive value
## Adaptive q: 0.618034 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/NaN replaced by maximum positive value
## Adaptive q: 0.763932 CV score: 12984.35 
## Adaptive q: 0.854102 CV score: 8420.557 
## Adaptive q: 0.809017 CV score: 9385.429 
## Adaptive q: 0.8480744 CV score: 8379.593 
## Adaptive q: 0.8463813 CV score: 8369.457 
## Adaptive q: 0.8321094 CV score: 8361.821 
## Adaptive q: 0.838462 CV score: 8340.935 
## Adaptive q: 0.8386912 CV score: 8341.112 
## Adaptive q: 0.8379496 CV score: 8340.728 
## Adaptive q: 0.8357189 CV score: 8343.227 
## Adaptive q: 0.8378015 CV score: 8340.719 
## Adaptive q: 0.8378422 CV score: 8340.719 
## Adaptive q: 0.8377608 CV score: 8340.72 
## Adaptive q: 0.8378015 CV score: 8340.719
# e. Jalankan GWR dengan bandwidth adaptif
gwr_adaptive_model <- gwr(
  formula = gwr_formula,
  data = gabung_sp,
  gweight = gweight_fun,
  adapt = bw_adaptive,
  hatmatrix = TRUE,
  se.fit = TRUE,
  longlat = TRUE
)

# f. Lihat hasil
gwr_adaptive_model
## Call:
## gwr(formula = gwr_formula, data = gabung_sp, gweight = gweight_fun, 
##     adapt = bw_adaptive, hatmatrix = TRUE, longlat = TRUE, se.fit = TRUE)
## Kernel function: gweight_fun 
## Adaptive quantile: 0.8378015 (about 8 of 10 data points)
## Summary of GWR coefficient estimates at data points:
##                     Min.     1st Qu.      Median     3rd Qu.        Max.
## X.Intercept. -6.1102e+01 -6.9259e+00  7.4333e-01  4.5019e+00  3.5896e+01
## X1           -4.4230e-01 -2.2686e-01 -1.0278e-01  6.9459e-03  2.0204e-01
## X2           -3.9384e-01  2.2183e-01  7.0936e-01  1.3324e+00  3.6778e+00
## X3            3.6323e-02  4.5771e-02  7.0202e-02  9.0157e-02  1.5436e-01
## X4            3.9717e-03  2.4265e-02  3.5758e-02  4.7529e-02  7.0708e-02
## X5           -6.0083e-04  1.4575e-05  3.6623e-04  6.3819e-04  7.3432e-04
##               Global
## X.Intercept. -1.9812
## X1           -0.1347
## X2            2.0206
## X3            0.0824
## X4            0.0017
## X5            0.0010
## Number of data points: 10 
## Effective number of parameters (residual: 2traceS - traceS'S): 9.774655 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 0.2253451 
## Sigma (residual: 2traceS - traceS'S): 6.335934 
## Effective number of parameters (model: traceS): 9.476845 
## Effective degrees of freedom (model: traceS): 0.5231545 
## Sigma (model: traceS): 4.158337 
## Sigma (ML): 0.9511185 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): -114.505 
## AIC (GWR p. 96, eq. 4.22): 36.85328 
## Residual sum of squares: 9.046264 
## Quasi-global R2: 0.9992292
# ============================================
# 11. Uji Nova
# Ambil RSS (Residual Sum of Squares) dari model OLS
ols_model
## 
## Call:
## lm(formula = gwr_formula, data = gabung_sp)
## 
## Coefficients:
## (Intercept)           X1           X2           X3           X4           X5  
##  -1.9811898   -0.1346840    2.0206381    0.0823605    0.0017466    0.0009641
RSS_OLS <- sum(residuals(ols_model)^2)
DF_OLS <- df.residual(ols_model)

# Ambil RSS dan DOF dari GWR
RSS_GWR <- sum(gwr_adaptive_model$SDF$gwr.e^2)
EDF_GWR <- gwr_adaptive_model$results$edf  # Effective Degrees of Freedom
DF_GWR <- EDF_GWR                 # Sesuai definisi DOF residual GWR

# Hitung peningkatan penyesuaian model
Improvement <- RSS_OLS - RSS_GWR
DF_Improvement <- DF_OLS - DF_GWR

# Mean Square (MS)
MS_Improvement <- Improvement / DF_Improvement
MS_GWR <- RSS_GWR / DF_GWR

# F-Hitung
F_hit <- MS_Improvement / MS_GWR

# Tabel ANOVA manual GWR
anova_manual <- data.frame(
  Sumber = c("Global Residuals (OLS)", "GWR Improvement", "GWR Residuals"),
  SS = c(RSS_OLS, Improvement, RSS_GWR),
  DF = c(DF_OLS, DF_Improvement, DF_GWR),
  MS = c(NA, MS_Improvement, MS_GWR),
  F = c(NA, NA, F_hit)
)

# Hitung p-value berdasarkan F-hitung
p_value <- 1 - pf(F_hit, DF_Improvement, DF_GWR)

# Tambahkan p-value ke tabel ANOVA
anova_manual$`p-value` <- c(NA, NA, p_value)

print(anova_manual)
##                   Sumber         SS        DF        MS        F   p-value
## 1 Global Residuals (OLS) 706.586553 4.0000000        NA       NA        NA
## 2        GWR Improvement 697.540289 3.7746549 184.79578       NA        NA
## 3          GWR Residuals   9.046264 0.2253451  40.14405 4.603316 0.6746115
kable(anova_manual)
Sumber SS DF MS F p-value
Global Residuals (OLS) 706.586553 4.0000000 NA NA NA
GWR Improvement 697.540289 3.7746549 184.79578 NA NA
GWR Residuals 9.046264 0.2253451 40.14405 4.603316 0.6746115
anova(gwr_adaptive_model,ols_model)
## Analysis of Variance Table 
##                      Df Sum Sq Mean Sq F value
## OLS Residuals   6.00000 706.59                
## GWR Improvement 3.77465 697.54 184.796        
## GWR Residuals   0.22535   9.05  40.144  4.6033
qf(1-0.05,DF_Improvement,DF_GWR)
## [1] 50455987949
qf(0.05,DF_Improvement,DF_GWR)
## [1] 0.1092679
# ============================================
# 12. Visualiasasi dan Tabel
library(sf)
library(dplyr)

# 1. Data GWR dari SDF, tambahkan idkab dari spKab_utm (asumsi urutannya cocok)
gwr_sf <- st_as_sf(gwr_adaptive_model$SDF)
gwr_sf$ID <- spProv_utm$ID  # <- pastikan ini cocok!

# 2. Ambil geometri asli dari spKab_utm (pastikan ini adalah polygon, bukan titik!)nmkab
geom_ref <- spProv_utm %>% dplyr::select(ID, Provinsi, Geometry)

# 3. Gabungkan: gunakan data dari GWR (gwr_sf) dan ambil geometri dari geom_ref
gwr_joined <- left_join(st_drop_geometry(gwr_sf), geom_ref, by = "ID") %>%
  st_as_sf()

# 4. Visualisasi: geometri harus polygon, bukan titik!
plot(st_geometry(gwr_joined))  # Cek, harus terlihat area, bukan titik

# 5. Jika perlu ubah ke Spatial untuk spplot:
gwr_sp <- as(gwr_joined, "Spatial")

# ============================================
# 12. Uji Signifikansi Parsial Lokal

vars <- c("X1", "X2", "X3", "X4","X5")



for (v in vars) {
  # Koefisien dan standar error
  coef_val <- gwr_sp@data[[v]]
  se_val <- gwr_sp@data[[paste0(v, "_se")]]
  t_val <- coef_val / se_val
  
  # 1. Simpan koefisien
  gwr_sp@data[[paste0("b_", v)]] <- coef_val
  
  # 2. Simpan status signifikan
  signif_flag <- ifelse(abs(t_val) > 1.96, "Ya", "Tidak")
  gwr_sp@data[[paste0("sig_", v)]] <- factor(signif_flag, levels = c("Tidak", "Ya"))
  
  # 3. Simpan koefisien hanya signifikan
  gwr_sp@data[[paste0("b_", v, "_sig")]] <- ifelse(abs(t_val) > 1.96, coef_val, NA)

  # --- Peta 1: Koefisien
  print(spplot(gwr_sp, zcol = paste0("b_", v),
               main = paste("Peta Koefisien", v),
               col.regions = colorRampPalette(c("blue", "white", "red"))(100),
               na.col = "grey"))
  
  # --- Peta 2: Signifikansi
  print(spplot(gwr_sp, zcol = paste0("sig_", v),
               main = paste("Signifikansi", v),
               col.regions = c("black", "red")))
  
  # --- Peta 3: Koefisien Hanya Signifikan
  print(spplot(gwr_sp, zcol = paste0("b_", v, "_sig"),
               main = paste("Koefisien Signifikan", v),
               col.regions = colorRampPalette(c("blue", "green", "red"))(100),
               na.col = "black"))
}

# Hitung jumlah kabupaten/kota yang signifikan (bernilai "Ya")
sig_counts <- sapply(vars, function(v) {
  kolom_nama <- paste0("sig_", v)
  sum(gwr_sp@data[[kolom_nama]] == "Ya", na.rm = TRUE)
})

# Buat data frame hasil
tabel7 <- data.frame(
  `Variabel Independen` = vars,
  `Jumlah Provinsi Signifikan` = sig_counts
)

# Tampilkan tabel
kable(tabel7)
Variabel.Independen Jumlah.Provinsi.Signifikan
X1 X1 4
X2 X2 3
X3 X3 9
X4 X4 9
X5 X5 3
# ============================================
# 13. Visualisasi Final Koefisien Signifikan
library(RColorBrewer)
library(classInt)
library(sp)

for (v in vars) {
  val <- gwr_sp@data[[paste0("b_", v, "_sig")]]
  
  # Inisialisasi vektor kategori NA
  cat_val <- rep(NA_character_, length(val))
  
  label_list <- c()
  color_list <- c()
  
  # --- NEGATIF ---
  val_neg <- val[val < 0 & !is.na(val)]
  if (length(val_neg) >= 3) {
    brk_neg <- classIntervals(val_neg, n = 3, style = "quantile")$brks
    neg_cuts <- cut(val, breaks = brk_neg, include.lowest = TRUE, dig.lab = 2)
    neg_lvls <- levels(neg_cuts)
    
    # Format label
    labels_neg <- gsub(",", ";", neg_lvls)
    labels_neg <- gsub("\\.", ",", labels_neg)
    
    cat_val[val < 0 & !is.na(val)] <- labels_neg[as.numeric(neg_cuts[val < 0 & !is.na(val)])]
    
    label_list <- c(label_list, labels_neg)
    color_list <- c(color_list, c("#8B0000", "#B22222", "#CD5C5C")[1:length(labels_neg)])
  }
  
  # --- POSITIF ---
  val_pos <- val[val > 0 & !is.na(val)]
  if (length(val_pos) >= 3) {
    brk_pos <- classIntervals(val_pos, n = 3, style = "quantile")$brks
    pos_cuts <- cut(val, breaks = brk_pos, include.lowest = TRUE, dig.lab = 2)
    pos_lvls <- levels(pos_cuts)
    
    # Format label
    labels_pos <- gsub(",", ";", pos_lvls)
    labels_pos <- gsub("\\.", ",", labels_pos)
    
    cat_val[val > 0 & !is.na(val)] <- labels_pos[as.numeric(pos_cuts[val > 0 & !is.na(val)])]
    
    label_list <- c(label_list, labels_pos)
    color_list <- c(color_list, c("#87CEFA", "#4682B4", "#00008B")[1:length(labels_pos)])
  }
  
  # --- Tidak Signifikan (NA) ---
  cat_val[is.na(cat_val)] <- "Tidak Signifikan"
  label_list <- c(label_list, "Tidak Signifikan")
  color_list <- c(color_list, "black")
  
  # Faktor berurutan
  cat_val <- factor(cat_val, levels = label_list)
  
  # Tambahkan ke data
  gwr_sp@data[[paste0("b_", v, "_sig_cat")]] <- cat_val
  
  # Plot
  print(
    spplot(gwr_sp, zcol = paste0("b_", v, "_sig_cat"),
           main = paste("Koefisien Signifikan", v),
           col.regions = setNames(color_list, label_list),
           par.settings = list(layout.heights = list(key.bottom = 2), axis.line = list(col = NA)),
           colorkey = list(
             space = "bottom",
             labels = list(
               at = 1:length(label_list),
               labels = label_list,
               cex = 1
             ),
             height = 1
           ))
  )
  
}

## Warning in classIntervals(val_pos, n = 3, style = "quantile"): n same as number
## of different finite values\neach different finite value is a separate class

## Warning in classIntervals(val_pos, n = 3, style = "quantile"): n same as number
## of different finite values\neach different finite value is a separate class

# ============================================
# 14. Pengelompokkan Nilai Koefisien dan Signifikansi Parsial
# Daftar variabel yang dianalisis (misalnya sudah didefinisikan sebelumnya)
vars <- c("X1", "X2", "X3", "X4","X5")  # ganti sesuai variabelmu
# Inisialisasi hasil
hasil_tabel <- data.frame(Koef = c("Positif", "Negatif"), stringsAsFactors = FALSE)

# Loop per variabel (misalnya X1 sampai Xn)
for (v in vars) {
  kolom_nama <- paste0("b_", v, "_sig")  # Kolom berisi koefisien signifikan
  
  # Pastikan kolom ada
  if (!kolom_nama %in% names(gwr_sp@data)) next
  
  val <- gwr_sp@data[[kolom_nama]]
  nama <- gwr_sp@data$Provinsi  # atau NAMOBJ jika itu nama wilayah
  
  # Filter yang signifikan dan positif/negatif
  positif <- nama[which(val > 0 & !is.na(val))]
  negatif <- nama[which(val < 0 & !is.na(val))]
  
  # Gabungkan nama kecamatan
  hasil_tabel[[v]] <- c(
    if (length(positif) > 0) paste(sort(positif), collapse = ", ") else "-",
    if (length(negatif) > 0) paste(sort(negatif), collapse = ", ") else "-"
  )
}
# Tampilkan hasil
kable(hasil_tabel, row.names = FALSE)
Koef X1 X2 X3 X4 X5
Positif Aceh, Sumatera Utara Kepulauan Riau, Riau, Sumatera Barat Aceh, Bengkulu, Jambi, Kepulauan Bangka Belitung, Lampung, Riau, Sumatera Barat, Sumatera Selatan, Sumatera Utara Aceh, Bengkulu, Jambi, Kepulauan Riau, Lampung, Riau, Sumatera Barat, Sumatera Selatan, Sumatera Utara Aceh, Kepulauan Bangka Belitung, Sumatera Utara
Negatif Bengkulu, Jambi - - - -
# ============================================
# 15. Tabel Jumlah Kabupaten dengan Kelompok Kombinasi Signifikansi Parsial

# --- Ambil nilai signifikan dari gwr_sp
sig_df <- gwr_sp@data[, paste0("sig_", vars)]
# --- Pastikan hasilnya logical
sig_df <- lapply(sig_df, function(x) {
  if (is.factor(x)) as.character(x) == "Ya" else as.logical(x)
})
sig_df <- as.data.frame(sig_df)
# --- Buat kolom var_sig langsung di @data
gwr_sp@data$var_sig <- apply(sig_df, 1, function(row) {
  signif_vars <- vars[which(row)]
  if (length(signif_vars) == 0) {
    return("Tidak ada")
  } else {
    return(paste(sort(signif_vars), collapse = ", "))
  }
})
# --- Rekap: nama kecamatan per kategori var_sig
tabel_kec_per_kategori <- gwr_sp@data %>%
  group_by(var_sig) %>%
  summarise(
    daftar_provinsi = paste(sort(unique(Provinsi)), collapse = ", "),
    jumlah = dplyr::n()
  ) %>%
  arrange(var_sig)
# --- Tampilkan semua hasil
kable(tabel_kec_per_kategori, n = Inf)
var_sig daftar_provinsi jumlah
X1, X3, X4 Bengkulu, Jambi 2
X1, X3, X4, X5 Aceh, Sumatera Utara 2
X2, X3, X4 Riau, Sumatera Barat 2
X2, X4 Kepulauan Riau 1
X3, X4 Lampung, Sumatera Selatan 2
X3, X5 Kepulauan Bangka Belitung 1
# ============================================
# 16. Visualisasi Kelompok Kombinasi Signifikansi Parsial
# melihat urutan unik saat ini
library(ggplot2)
library(sf)
gwr_sf <- st_as_sf(gwr_sp)
gwr_sf$var_sig <- factor(gwr_sf$var_sig)

# Ubah ke sf jika belum
# Simpan urutan lama
old_levels <- levels(gwr_sf$var_sig)

# Tukar level 1 dan 2
new_levels <- old_levels
new_levels[c(1,2)] <- new_levels[c(2,1)]

# Terapkan level baru
gwr_sf$var_sig <- factor(gwr_sf$var_sig, levels = new_levels)

warna_kategori_final <- c(
  "#8B0000",  # oranye
  "#3CB371",  # hijau mint
  "#4169E1",  # biru lembut
  "#e78ac3",  # pink
  "#a6d854",  # hijau muda
  "#ffd92f"   # kuning
)



# Buat warna sesuai urutan faktor
warna_kategori_named <- setNames(warna_kategori_final, levels(gwr_sf$var_sig))
# Plot
ggplot(gwr_sf) +
  geom_sf(aes(fill = var_sig), color = "black", size = 0.2) +
  scale_fill_manual(
    values = warna_kategori_named,
    name = "Signifikansi Variabel Lokal",
    guide = guide_legend(
      title.position = "top",
      label.position = "right"  # label di kanan, default menurun
    )
  ) +
  labs(title = "Peta Choropleth: Kelompok Kombinasi Variabel Signifikan (GWR)") +
  coord_sf(datum = NA) +  # Hilangkan label koordinat dan garis sumbu
  theme_void() +          # Hilangkan seluruh grid & axis
  theme(
    legend.position = "right",  # ubah posisi legend ke kanan
    legend.title.align = 0.5,
    legend.text = element_text(size = 13),
    legend.key.width = unit(1.7, "cm"),  # Lebar kotak warna legend
    legend.key.height = unit(0.8, "cm"), # Tinggi kotak warna legend (opsional
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18)
  )