# library
suppressPackageStartupMessages({
  library(dplyr) # for "glimpse"
  library(ggplot2)
  library(scales) # for "comma"
  library(magrittr)
  library(sf)
  library(ncdf4)
  library(tidyverse)
  library(reticulate)
  library(gstat)
  library(sp)
  library(raster)
  library(Metrics)
  library(gganimate)
  library(spdep)
  library(spatialreg)
  library(olsrr)
  library(car)
  library(corrplot)
  library(lmtest)
  library(tmap)
  library(tmaptools)
})
## Warning: package 'reticulate' was built under R version 4.4.2
## Warning: package 'gstat' was built under R version 4.4.2
## Warning: package 'raster' was built under R version 4.4.2
## Warning: package 'gganimate' was built under R version 4.4.2
## Warning: package 'spdep' was built under R version 4.4.2
## Warning: package 'spatialreg' was built under R version 4.4.2
## Warning: package 'Matrix' was built under R version 4.4.2
## Warning: package 'olsrr' was built under R version 4.4.2
# Set working directory
setwd("C:/Users/suhaila/Downloads")
# Set working directory
# Load data
data <- read.csv("Data UAS - Sheet1 (1).csv", dec=",")
kaltim <- read_sf("Kalimantan_Timur_ADMIN_BPS.shp")
kaltim <- as_Spatial(kaltim)
class(kaltim)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(slot(kaltim,"data"))
## 'data.frame':    10 obs. of  6 variables:
##  $ ADM0_EN  : chr  "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
##  $ date     : Date, format: "2019-12-20" "2019-12-20" ...
##  $ validOn  : Date, format: "2020-04-01" "2020-04-01" ...
##  $ PROVINCE : chr  "Kalimantan Timur" "Kalimantan Timur" "Kalimantan Timur" "Kalimantan Timur" ...
##  $ Kabupaten: chr  "Berau" "Kota Balikpapan" "Kota Bontang" "Kota Samarinda" ...
##  $ PRV2     : chr  "Kalimantan_Timur" "Kalimantan_Timur" "Kalimantan_Timur" "Kalimantan_Timur" ...
# Hilangkan tanda titik dan ubah koma menjadi titik (jika ada)
data$PDRB <- gsub("\\.", "", data$PDRB)  # Hapus tanda titik
data$PDRB <- gsub(",", ".", data$PDRB)  # Ganti koma dengan titik

# Konversi ke numerik
data$PDRB <- as.numeric(data$PDRB)

# Cek hasilnya
str(data$PDRB)
##  num [1:10] 3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
# Coordinate
IDs <- kaltim$Kabupaten
coords<-coordinates(kaltim)

# Checking missing value
colSums(is.na(data))
##                Kabupaten.Kota                         Tahun 
##                             0                             0 
##                           TPT                          TPAK 
##                             0                             0 
##                           IPM        Jumlah.Penduduk.Miskin 
##                             0                             0 
##   Indeks.kemahalan.konstruksi            Umur.Harapan.Hidup 
##                             0                             0 
##        Rata.Rata.Lama.Sekolah     Laju.Pertumbuhan.Penduduk 
##                             0                             0 
##                   Indeks.Gini                          PDRB 
##                             0                             0 
## Realisasi.Modal.Investasi.PMA 
##                             0
str(data)
## 'data.frame':    10 obs. of  13 variables:
##  $ Kabupaten.Kota               : chr  "Balikpapan" "Berau" "Bontang " "Kutai Barat " ...
##  $ Tahun                        : int  2023 2023 2023 2023 2023 2023 2023 2023 2023 2023
##  $ TPT                          : num  6.09 4.95 7.74 6.16 4.05 5.93 2.09 4.72 2.07 5.92
##  $ TPAK                         : num  63.5 66.9 68.3 70.5 65.3 ...
##  $ IPM                          : num  82 76.7 81.6 74 76 ...
##  $ Jumlah.Penduduk.Miskin       : num  14.99 13.26 7.71 14.69 60.86 ...
##  $ Indeks.kemahalan.konstruksi  : num  111 113 114 121 110 ...
##  $ Umur.Harapan.Hidup           : num  74.9 72.4 74.7 73.2 72.8 ...
##  $ Rata.Rata.Lama.Sekolah       : num  10.93 9.56 10.92 8.85 9.26 ...
##  $ Laju.Pertumbuhan.Penduduk    : num  1.14 1.48 1.39 1.01 1.35 1.73 1.39 1.13 3.73 0.99
##  $ Indeks.Gini                  : num  0.323 0.327 0.299 0.277 0.284 0.336 0.33 0.292 0.299 0.323
##  $ PDRB                         : num  3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
##  $ Realisasi.Modal.Investasi.PMA: num  77904 105780 331154 458236 46997 ...
#Define datasets
kaltim@data <- data[,c(-1,-1)]
y <- kaltim$IPM
x1 <- kaltim$TPT
x2 <- kaltim$TPAK
x3 <- kaltim$Realisasi.Modal.Investasi.PMA
x4 <- kaltim$Rata.Rata.Lama.Sekolah
# Linear model OLS
tpt.ols <- lm(y ~ x1 + x2 + x3 + x4)
summary(tpt.ols)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4)
## 
## Residuals:
##         1         2         3         4         5         6         7         8 
## -0.670877  0.164085  0.005478  0.544006  0.083479 -0.047967 -0.782393 -0.946132 
##         9        10 
##  0.998093  0.652228 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.521e+01  7.901e+00   6.988 0.000924 ***
## x1          -4.859e-01  3.932e-01  -1.236 0.271352    
## x2          -2.681e-01  9.083e-02  -2.951 0.031843 *  
## x3           4.241e-06  3.050e-06   1.390 0.223162    
## x4           4.312e+00  6.059e-01   7.117 0.000849 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8615 on 5 degrees of freedom
## Multiple R-squared:  0.9756, Adjusted R-squared:  0.9562 
## F-statistic: 50.07 on 4 and 5 DF,  p-value: 0.0003185
library(car)
# Multicolinearity
vif(tpt.ols)
##       x1       x2       x3       x4 
## 6.201875 1.258235 2.792846 4.369041
library(lmtest)
# Heteroskedasticity
bptest(tpt.ols)
## 
##  studentized Breusch-Pagan test
## 
## data:  tpt.ols
## BP = 6.071, df = 4, p-value = 0.1939
# Normality
e <- residuals(tpt.ols)
shapiro.test(e)
## 
##  Shapiro-Wilk normality test
## 
## data:  e
## W = 0.94929, p-value = 0.6601
#Non-autocorrelation
durbinWatsonTest(tpt.ols)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1130757      1.537943   0.372
##  Alternative hypothesis: rho != 0
#QUEEN

# 1. Matriks Bobot KNN (contoh 3 tetangga terdekat)
k_neighbors1 <- 3
knn_weights1 <- knearneigh(coords, k = k_neighbors1) 
knn_nb1 <- knn2nb(knn_weights1)
knn_listw1 <- nb2listw(knn_nb1, style = "W")  # Menggunakan bobot standar

k_neighbors2 <- 4
knn_weights2 <- knearneigh(coords, k = k_neighbors2) 
## Warning in knearneigh(coords, k = k_neighbors2): k greater than one-third of
## the number of data points
knn_nb2 <- knn2nb(knn_weights2)
knn_listw2 <- nb2listw(knn_nb2, style = "W")  # Menggunakan bobot standar


# 2. Matriks Bobot Queen
queen_nb <- poly2nb(kaltim)
queen_listw <- nb2listw(queen_nb, style = "W")  # Menggunakan bobot standar

# 3. Matriks Bobot Rook
rook_nb <- poly2nb(kaltim, queen = FALSE)
rook_listw <- nb2listw(rook_nb, style = "W")  # Menggunakan bobot standar

# Load library
library(spdep)

# Misalkan 'variabel_target' adalah nama kolom dari data yang ingin diuji
variabel_target <- data$IPM

# Hitung nilai Moran's I untuk KNN
knn_moran1 <- moran.test(variabel_target, knn_listw1)
print(knn_moran1)
## 
##  Moran I test under randomisation
## 
## data:  variabel_target  
## weights: knn_listw1    
## 
## Moran I statistic standard deviate = -1.6416, p-value = 0.9497
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        -0.4107382        -0.1111111         0.0333124
knn_moran2 <- moran.test(variabel_target, knn_listw2)
print(knn_moran2)
## 
##  Moran I test under randomisation
## 
## data:  variabel_target  
## weights: knn_listw2    
## 
## Moran I statistic standard deviate = -1.2909, p-value = 0.9016
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.29495011       -0.11111111        0.02028143
# Hitung nilai Moran's I untuk Queen
queen_moran <- moran.test(variabel_target, queen_listw)
print(queen_moran)
## 
##  Moran I test under randomisation
## 
## data:  variabel_target  
## weights: queen_listw    
## 
## Moran I statistic standard deviate = -1.8401, p-value = 0.9671
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.53543494       -0.11111111        0.05317398
# Hitung nilai Moran's I untuk Rook
rook_moran <- moran.test(variabel_target, rook_listw)
print(rook_moran)
## 
##  Moran I test under randomisation
## 
## data:  variabel_target  
## weights: rook_listw    
## 
## Moran I statistic standard deviate = -1.8401, p-value = 0.9671
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.53543494       -0.11111111        0.05317398
# Bandingkan hasil Moran's I
moran_results <- data.frame(
  Method = c("KNN 3","KNN 4", "Queen", "Rook"),
  Moran_I = c(knn_moran1$estimate[1],knn_moran2$estimate[1], queen_moran$estimate[1], rook_moran$estimate[1]),
  Expectation = c(knn_moran1$estimate[2],knn_moran2$estimate[2], queen_moran$estimate[2], rook_moran$estimate[2]),
  Variance = c(knn_moran1$estimate[3],knn_moran2$estimate[3], queen_moran$estimate[3], rook_moran$estimate[3]),
  P_Value = c(knn_moran1$p.value, knn_moran2$p.value, queen_moran$p.value, rook_moran$p.value)
)

print(moran_results)
##   Method    Moran_I Expectation   Variance   P_Value
## 1  KNN 3 -0.4107382  -0.1111111 0.03331240 0.9496678
## 2  KNN 4 -0.2949501  -0.1111111 0.02028143 0.9016286
## 3  Queen -0.5354349  -0.1111111 0.05317398 0.9671252
## 4   Rook -0.5354349  -0.1111111 0.05317398 0.9671252
# Spatial weight matrix
library(spdep)
#KNN
W.K <- knn2nb(knearneigh(coords, k=3 ,longlat=TRUE))
WL.K <- nb2listw(W.K,style='W') ; WL.K
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10 
## Number of nonzero links: 30 
## Percentage nonzero weights: 30 
## Average number of links: 3 
## Non-symmetric neighbours list
## 
## Weights style: W 
## Weights constants summary:
##    n  nn S0       S1       S2
## W 10 100 10 5.555556 42.44444
#Queen
W.Q <- poly2nb(kaltim, row.names=IDs, queen=TRUE)
WL.Q <- nb2listw(W.Q,style='W', zero.policy = TRUE) ; WL.Q
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10 
## Number of nonzero links: 28 
## Percentage nonzero weights: 28 
## Average number of links: 2.8 
## 
## Weights style: W 
## Weights constants summary:
##    n  nn S0       S1       S2
## W 10 100 10 8.053571 48.60714
#Rook
list.queen <- poly2nb(kaltim, row.names=IDs, queen=FALSE)
W <- nb2listw(list.queen, style="W", zero.policy=TRUE); W
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10 
## Number of nonzero links: 28 
## Percentage nonzero weights: 28 
## Average number of links: 2.8 
## 
## Weights style: W 
## Weights constants summary:
##    n  nn S0       S1       S2
## W 10 100 10 8.053571 48.60714
WB <- nb2mat(list.queen, style='W', zero.policy = TRUE)
#Moran's I
IPM<-data$IPM
moran.test(IPM, WL.K, alternative = "two.sided")
## 
##  Moran I test under randomisation
## 
## data:  IPM  
## weights: WL.K    
## 
## Moran I statistic standard deviate = -1.6416, p-value = 0.1007
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        -0.4107382        -0.1111111         0.0333124
moran.test(IPM, W, alternative = "two.sided")
## 
##  Moran I test under randomisation
## 
## data:  IPM  
## weights: W    
## 
## Moran I statistic standard deviate = -1.8401, p-value = 0.06575
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.53543494       -0.11111111        0.05317398
moran.test(IPM, WL.Q, alternative = "two.sided")
## 
##  Moran I test under randomisation
## 
## data:  IPM  
## weights: WL.Q    
## 
## Moran I statistic standard deviate = -1.8401, p-value = 0.06575
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.53543494       -0.11111111        0.05317398
moran.lm<-lm.morantest(tpt.ols, W, alternative="two.sided")
print(moran.lm)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## weights: W
## 
## Moran I statistic standard deviate = 2.3675, p-value = 0.01791
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I      Expectation         Variance 
##       0.34851679      -0.09914931       0.03575383
Local_Moran <- localmoran(data$IPM, W)
   
quadr_data <- attr(Local_Moran, "quadr")
mean_values <- quadr_data$mean

kaltim_sf <- st_as_sf(kaltim)
mean_values_char <- as.character(attr(Local_Moran, "quadr")$mean)
kaltim_sf$mean_values <- mean_values_char
ggplot() +
  geom_sf(data = kaltim_sf, aes(fill = mean_values)) +  # Fill based on 'mean_values'
  scale_fill_manual(values = c("Low-Low" = "blue", "High-Low" = "green", 
                               "Low-High" = "yellow", "High-High" = "red")) + 
  labs(title = "Local Moran's I Mean Values in Kalimantan Timur", fill = "Mean Type") +
  theme_minimal()

# Lagrange multiplier test
lm.RStests(tpt.ols, W, test="all")
## 
##  Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
##  dependence
## 
## data:  
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
## 
## RSerr = 1.5082, df = 1, p-value = 0.2194
## 
## 
##  Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
##  dependence
## 
## data:  
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
## 
## RSlag = 3.3312, df = 1, p-value = 0.06798
## 
## 
##  Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
##  dependence
## 
## data:  
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
## 
## adjRSerr = 0.26668, df = 1, p-value = 0.6056
## 
## 
##  Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
##  dependence
## 
## data:  
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
## 
## adjRSlag = 2.0897, df = 1, p-value = 0.1483
## 
## 
##  Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
##  dependence
## 
## data:  
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
## 
## SARMA = 3.5979, df = 2, p-value = 0.1655
# Spatial lag model
tpt.sar <- lagsarlm(y ~ x1+x2+x3+x4, listw = W)
summary(tpt.sar)
## 
## Call:lagsarlm(formula = y ~ x1 + x2 + x3 + x4, listw = W)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.932224 -0.329768  0.028291  0.263345  0.884510 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  3.7708e+01  1.0875e+01  3.4673 0.0005256
## x1          -5.0515e-01  2.3750e-01 -2.1269 0.0334276
## x2          -3.1118e-01  6.0924e-02 -5.1077 3.261e-07
## x3           5.9370e-06  2.0856e-06  2.8467 0.0044178
## x4           4.7603e+00  4.3811e-01 10.8656 < 2.2e-16
## 
## Rho: 0.21006, LR test value: 3.0389, p-value: 0.081289
## Asymptotic standard error: 0.12075
##     z-value: 1.7395, p-value: 0.081939
## Wald statistic: 3.026, p-value: 0.081939
## 
## Log likelihood: -7.713682 for lag model
## ML residual variance (sigma squared): 0.27001, (sigma: 0.51963)
## Number of observations: 10 
## Number of parameters estimated: 7 
## AIC: 29.427, (AIC for lm: 30.466)
## LM test for residual autocorrelation
## test value: 0.06955, p-value: 0.79199
# Spatial Durbin Model
tpt.sdm <- lagsarlm(y ~ x1 + x2 + x3 + x4,  listw = W,
                    Durbin = y ~ x1 + x2 + x3 +x4, type = "mixed")
summary(tpt.sdm)
## 
## Call:lagsarlm(formula = y ~ x1 + x2 + x3 + x4, listw = W, Durbin = y ~ 
##     x1 + x2 + x3 + x4, type = "mixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.351081 -0.056520  0.020755  0.081199  0.185771 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  3.2485e+01  1.4228e+01  2.2832   0.02242
## x1           8.1723e-01  1.6589e-01  4.9264 8.376e-07
## x2           1.2022e-01  4.7587e-02  2.5264   0.01152
## x3          -2.8718e-06  1.9052e-06 -1.5074   0.13171
## x4           2.3633e+00  3.0797e-01  7.6736 1.665e-14
## lag.x1      -2.0070e+00  3.6603e-01 -5.4831 4.179e-08
## lag.x2      -2.4062e-01  1.0440e-01 -2.3047   0.02118
## lag.x3      -1.5519e-05  3.1847e-06 -4.8732 1.098e-06
## lag.x4      -6.4931e-01  1.1800e+00 -0.5503   0.58213
## 
## Rho: 0.56337, LR test value: 1.439, p-value: 0.2303
## Asymptotic standard error: 0.21763
##     z-value: 2.5887, p-value: 0.0096337
## Wald statistic: 6.7014, p-value: 0.0096337
## 
## Log likelihood: 4.209974 for mixed model
## ML residual variance (sigma squared): 0.022409, (sigma: 0.1497)
## Number of observations: 10 
## Number of parameters estimated: 11 
## AIC: 13.58, (AIC for lm: 13.019)
## LM test for residual autocorrelation
## test value: 8.365, p-value: 0.0038251
# Model selection
data.frame("MODEL" = c("OLS", "SAR", "SDM"),
           "AIC" = c(AIC(tpt.ols), AIC(tpt.sar),AIC(tpt.sdm)))
##   MODEL      AIC
## 1   OLS 30.46631
## 2   SAR 29.42736
## 3   SDM 13.58005
# Multicolinearity
X <- data.frame(x1, x2,x3,x4)
R <- cor(X) #Correlation
VIF <- diag(solve(R))
VIF 
##       x1       x2       x3       x4 
## 6.201875 1.258235 2.792846 4.369041
# Heteroskedasticity
bptest.Sarlm(tpt.sdm)
## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 6.2384, df = 8, p-value = 0.6206
# Normality
shapiro.test(residuals(tpt.sdm))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(tpt.sdm)
## W = 0.9151, p-value = 0.3179
#Non-autocorrelation
moran.test(residuals(tpt.sdm), W)
## 
##  Moran I test under randomisation
## 
## data:  residuals(tpt.sdm)  
## weights: W    
## 
## Moran I statistic standard deviate = -1.3957, p-value = 0.9186
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        -0.3997080        -0.1111111         0.0427554
# Marginal effects
impacts(tpt.sdm, listw=W)
## Impact measures (mixed, exact):
##           Direct      Indirect         Total
## x1  4.325097e-01 -3.157405e+00 -2.724895e+00
## x2  7.721762e-02 -3.529534e-01 -2.757358e-01
## x3 -7.134743e-06 -3.498629e-05 -4.212103e-05
## x4  2.532923e+00  1.392480e+00  3.925403e+00