# library
suppressPackageStartupMessages({
library(dplyr) # for "glimpse"
library(ggplot2)
library(scales) # for "comma"
library(magrittr)
library(sf)
library(ncdf4)
library(tidyverse)
library(reticulate)
library(gstat)
library(sp)
library(raster)
library(Metrics)
library(gganimate)
library(spdep)
library(spatialreg)
library(olsrr)
library(car)
library(corrplot)
library(lmtest)
library(tmap)
library(tmaptools)
})
## Warning: package 'reticulate' was built under R version 4.4.2
## Warning: package 'gstat' was built under R version 4.4.2
## Warning: package 'raster' was built under R version 4.4.2
## Warning: package 'gganimate' was built under R version 4.4.2
## Warning: package 'spdep' was built under R version 4.4.2
## Warning: package 'spatialreg' was built under R version 4.4.2
## Warning: package 'Matrix' was built under R version 4.4.2
## Warning: package 'olsrr' was built under R version 4.4.2
# Set working directory
setwd("C:/Users/suhaila/Downloads")
# Set working directory
# Load data
data <- read.csv("Data UAS - Sheet1 (1).csv", dec=",")
kaltim <- read_sf("Kalimantan_Timur_ADMIN_BPS.shp")
kaltim <- as_Spatial(kaltim)
class(kaltim)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(slot(kaltim,"data"))
## 'data.frame': 10 obs. of 6 variables:
## $ ADM0_EN : chr "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
## $ date : Date, format: "2019-12-20" "2019-12-20" ...
## $ validOn : Date, format: "2020-04-01" "2020-04-01" ...
## $ PROVINCE : chr "Kalimantan Timur" "Kalimantan Timur" "Kalimantan Timur" "Kalimantan Timur" ...
## $ Kabupaten: chr "Berau" "Kota Balikpapan" "Kota Bontang" "Kota Samarinda" ...
## $ PRV2 : chr "Kalimantan_Timur" "Kalimantan_Timur" "Kalimantan_Timur" "Kalimantan_Timur" ...
# Hilangkan tanda titik dan ubah koma menjadi titik (jika ada)
data$PDRB <- gsub("\\.", "", data$PDRB) # Hapus tanda titik
data$PDRB <- gsub(",", ".", data$PDRB) # Ganti koma dengan titik
# Konversi ke numerik
data$PDRB <- as.numeric(data$PDRB)
# Cek hasilnya
str(data$PDRB)
## num [1:10] 3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
# Coordinate
IDs <- kaltim$Kabupaten
coords<-coordinates(kaltim)
# Checking missing value
colSums(is.na(data))
## Kabupaten.Kota Tahun
## 0 0
## TPT TPAK
## 0 0
## IPM Jumlah.Penduduk.Miskin
## 0 0
## Indeks.kemahalan.konstruksi Umur.Harapan.Hidup
## 0 0
## Rata.Rata.Lama.Sekolah Laju.Pertumbuhan.Penduduk
## 0 0
## Indeks.Gini PDRB
## 0 0
## Realisasi.Modal.Investasi.PMA
## 0
str(data)
## 'data.frame': 10 obs. of 13 variables:
## $ Kabupaten.Kota : chr "Balikpapan" "Berau" "Bontang " "Kutai Barat " ...
## $ Tahun : int 2023 2023 2023 2023 2023 2023 2023 2023 2023 2023
## $ TPT : num 6.09 4.95 7.74 6.16 4.05 5.93 2.09 4.72 2.07 5.92
## $ TPAK : num 63.5 66.9 68.3 70.5 65.3 ...
## $ IPM : num 82 76.7 81.6 74 76 ...
## $ Jumlah.Penduduk.Miskin : num 14.99 13.26 7.71 14.69 60.86 ...
## $ Indeks.kemahalan.konstruksi : num 111 113 114 121 110 ...
## $ Umur.Harapan.Hidup : num 74.9 72.4 74.7 73.2 72.8 ...
## $ Rata.Rata.Lama.Sekolah : num 10.93 9.56 10.92 8.85 9.26 ...
## $ Laju.Pertumbuhan.Penduduk : num 1.14 1.48 1.39 1.01 1.35 1.73 1.39 1.13 3.73 0.99
## $ Indeks.Gini : num 0.323 0.327 0.299 0.277 0.284 0.336 0.33 0.292 0.299 0.323
## $ PDRB : num 3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
## $ Realisasi.Modal.Investasi.PMA: num 77904 105780 331154 458236 46997 ...
#Define datasets
kaltim@data <- data[,c(-1,-1)]
y <- kaltim$IPM
x1 <- kaltim$TPT
x2 <- kaltim$TPAK
x3 <- kaltim$Realisasi.Modal.Investasi.PMA
x4 <- kaltim$Rata.Rata.Lama.Sekolah
# Linear model OLS
tpt.ols <- lm(y ~ x1 + x2 + x3 + x4)
summary(tpt.ols)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -0.670877 0.164085 0.005478 0.544006 0.083479 -0.047967 -0.782393 -0.946132
## 9 10
## 0.998093 0.652228
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.521e+01 7.901e+00 6.988 0.000924 ***
## x1 -4.859e-01 3.932e-01 -1.236 0.271352
## x2 -2.681e-01 9.083e-02 -2.951 0.031843 *
## x3 4.241e-06 3.050e-06 1.390 0.223162
## x4 4.312e+00 6.059e-01 7.117 0.000849 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8615 on 5 degrees of freedom
## Multiple R-squared: 0.9756, Adjusted R-squared: 0.9562
## F-statistic: 50.07 on 4 and 5 DF, p-value: 0.0003185
library(car)
# Multicolinearity
vif(tpt.ols)
## x1 x2 x3 x4
## 6.201875 1.258235 2.792846 4.369041
library(lmtest)
# Heteroskedasticity
bptest(tpt.ols)
##
## studentized Breusch-Pagan test
##
## data: tpt.ols
## BP = 6.071, df = 4, p-value = 0.1939
# Normality
e <- residuals(tpt.ols)
shapiro.test(e)
##
## Shapiro-Wilk normality test
##
## data: e
## W = 0.94929, p-value = 0.6601
#Non-autocorrelation
durbinWatsonTest(tpt.ols)
## lag Autocorrelation D-W Statistic p-value
## 1 0.1130757 1.537943 0.372
## Alternative hypothesis: rho != 0
#QUEEN
# 1. Matriks Bobot KNN (contoh 3 tetangga terdekat)
k_neighbors1 <- 3
knn_weights1 <- knearneigh(coords, k = k_neighbors1)
knn_nb1 <- knn2nb(knn_weights1)
knn_listw1 <- nb2listw(knn_nb1, style = "W") # Menggunakan bobot standar
k_neighbors2 <- 4
knn_weights2 <- knearneigh(coords, k = k_neighbors2)
## Warning in knearneigh(coords, k = k_neighbors2): k greater than one-third of
## the number of data points
knn_nb2 <- knn2nb(knn_weights2)
knn_listw2 <- nb2listw(knn_nb2, style = "W") # Menggunakan bobot standar
# 2. Matriks Bobot Queen
queen_nb <- poly2nb(kaltim)
queen_listw <- nb2listw(queen_nb, style = "W") # Menggunakan bobot standar
# 3. Matriks Bobot Rook
rook_nb <- poly2nb(kaltim, queen = FALSE)
rook_listw <- nb2listw(rook_nb, style = "W") # Menggunakan bobot standar
# Load library
library(spdep)
# Misalkan 'variabel_target' adalah nama kolom dari data yang ingin diuji
variabel_target <- data$IPM
# Hitung nilai Moran's I untuk KNN
knn_moran1 <- moran.test(variabel_target, knn_listw1)
print(knn_moran1)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: knn_listw1
##
## Moran I statistic standard deviate = -1.6416, p-value = 0.9497
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.4107382 -0.1111111 0.0333124
knn_moran2 <- moran.test(variabel_target, knn_listw2)
print(knn_moran2)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: knn_listw2
##
## Moran I statistic standard deviate = -1.2909, p-value = 0.9016
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.29495011 -0.11111111 0.02028143
# Hitung nilai Moran's I untuk Queen
queen_moran <- moran.test(variabel_target, queen_listw)
print(queen_moran)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: queen_listw
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.9671
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
# Hitung nilai Moran's I untuk Rook
rook_moran <- moran.test(variabel_target, rook_listw)
print(rook_moran)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: rook_listw
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.9671
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
# Bandingkan hasil Moran's I
moran_results <- data.frame(
Method = c("KNN 3","KNN 4", "Queen", "Rook"),
Moran_I = c(knn_moran1$estimate[1],knn_moran2$estimate[1], queen_moran$estimate[1], rook_moran$estimate[1]),
Expectation = c(knn_moran1$estimate[2],knn_moran2$estimate[2], queen_moran$estimate[2], rook_moran$estimate[2]),
Variance = c(knn_moran1$estimate[3],knn_moran2$estimate[3], queen_moran$estimate[3], rook_moran$estimate[3]),
P_Value = c(knn_moran1$p.value, knn_moran2$p.value, queen_moran$p.value, rook_moran$p.value)
)
print(moran_results)
## Method Moran_I Expectation Variance P_Value
## 1 KNN 3 -0.4107382 -0.1111111 0.03331240 0.9496678
## 2 KNN 4 -0.2949501 -0.1111111 0.02028143 0.9016286
## 3 Queen -0.5354349 -0.1111111 0.05317398 0.9671252
## 4 Rook -0.5354349 -0.1111111 0.05317398 0.9671252
# Spatial weight matrix
library(spdep)
#KNN
W.K <- knn2nb(knearneigh(coords, k=3 ,longlat=TRUE))
WL.K <- nb2listw(W.K,style='W') ; WL.K
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 30
## Percentage nonzero weights: 30
## Average number of links: 3
## Non-symmetric neighbours list
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 5.555556 42.44444
#Queen
W.Q <- poly2nb(kaltim, row.names=IDs, queen=TRUE)
WL.Q <- nb2listw(W.Q,style='W', zero.policy = TRUE) ; WL.Q
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 28
## Percentage nonzero weights: 28
## Average number of links: 2.8
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 8.053571 48.60714
#Rook
list.queen <- poly2nb(kaltim, row.names=IDs, queen=FALSE)
W <- nb2listw(list.queen, style="W", zero.policy=TRUE); W
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 28
## Percentage nonzero weights: 28
## Average number of links: 2.8
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 8.053571 48.60714
WB <- nb2mat(list.queen, style='W', zero.policy = TRUE)
#Moran's I
IPM<-data$IPM
moran.test(IPM, WL.K, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: WL.K
##
## Moran I statistic standard deviate = -1.6416, p-value = 0.1007
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.4107382 -0.1111111 0.0333124
moran.test(IPM, W, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: W
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.06575
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
moran.test(IPM, WL.Q, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: WL.Q
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.06575
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
moran.lm<-lm.morantest(tpt.ols, W, alternative="two.sided")
print(moran.lm)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## weights: W
##
## Moran I statistic standard deviate = 2.3675, p-value = 0.01791
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## 0.34851679 -0.09914931 0.03575383
Local_Moran <- localmoran(data$IPM, W)
quadr_data <- attr(Local_Moran, "quadr")
mean_values <- quadr_data$mean
kaltim_sf <- st_as_sf(kaltim)
mean_values_char <- as.character(attr(Local_Moran, "quadr")$mean)
kaltim_sf$mean_values <- mean_values_char
ggplot() +
geom_sf(data = kaltim_sf, aes(fill = mean_values)) + # Fill based on 'mean_values'
scale_fill_manual(values = c("Low-Low" = "blue", "High-Low" = "green",
"Low-High" = "yellow", "High-High" = "red")) +
labs(title = "Local Moran's I Mean Values in Kalimantan Timur", fill = "Mean Type") +
theme_minimal()

# Lagrange multiplier test
lm.RStests(tpt.ols, W, test="all")
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## RSerr = 1.5082, df = 1, p-value = 0.2194
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## RSlag = 3.3312, df = 1, p-value = 0.06798
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## adjRSerr = 0.26668, df = 1, p-value = 0.6056
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## adjRSlag = 2.0897, df = 1, p-value = 0.1483
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## SARMA = 3.5979, df = 2, p-value = 0.1655
# Spatial lag model
tpt.sar <- lagsarlm(y ~ x1+x2+x3+x4, listw = W)
summary(tpt.sar)
##
## Call:lagsarlm(formula = y ~ x1 + x2 + x3 + x4, listw = W)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.932224 -0.329768 0.028291 0.263345 0.884510
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.7708e+01 1.0875e+01 3.4673 0.0005256
## x1 -5.0515e-01 2.3750e-01 -2.1269 0.0334276
## x2 -3.1118e-01 6.0924e-02 -5.1077 3.261e-07
## x3 5.9370e-06 2.0856e-06 2.8467 0.0044178
## x4 4.7603e+00 4.3811e-01 10.8656 < 2.2e-16
##
## Rho: 0.21006, LR test value: 3.0389, p-value: 0.081289
## Asymptotic standard error: 0.12075
## z-value: 1.7395, p-value: 0.081939
## Wald statistic: 3.026, p-value: 0.081939
##
## Log likelihood: -7.713682 for lag model
## ML residual variance (sigma squared): 0.27001, (sigma: 0.51963)
## Number of observations: 10
## Number of parameters estimated: 7
## AIC: 29.427, (AIC for lm: 30.466)
## LM test for residual autocorrelation
## test value: 0.06955, p-value: 0.79199
# Spatial Durbin Model
tpt.sdm <- lagsarlm(y ~ x1 + x2 + x3 + x4, listw = W,
Durbin = y ~ x1 + x2 + x3 +x4, type = "mixed")
summary(tpt.sdm)
##
## Call:lagsarlm(formula = y ~ x1 + x2 + x3 + x4, listw = W, Durbin = y ~
## x1 + x2 + x3 + x4, type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.351081 -0.056520 0.020755 0.081199 0.185771
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.2485e+01 1.4228e+01 2.2832 0.02242
## x1 8.1723e-01 1.6589e-01 4.9264 8.376e-07
## x2 1.2022e-01 4.7587e-02 2.5264 0.01152
## x3 -2.8718e-06 1.9052e-06 -1.5074 0.13171
## x4 2.3633e+00 3.0797e-01 7.6736 1.665e-14
## lag.x1 -2.0070e+00 3.6603e-01 -5.4831 4.179e-08
## lag.x2 -2.4062e-01 1.0440e-01 -2.3047 0.02118
## lag.x3 -1.5519e-05 3.1847e-06 -4.8732 1.098e-06
## lag.x4 -6.4931e-01 1.1800e+00 -0.5503 0.58213
##
## Rho: 0.56337, LR test value: 1.439, p-value: 0.2303
## Asymptotic standard error: 0.21763
## z-value: 2.5887, p-value: 0.0096337
## Wald statistic: 6.7014, p-value: 0.0096337
##
## Log likelihood: 4.209974 for mixed model
## ML residual variance (sigma squared): 0.022409, (sigma: 0.1497)
## Number of observations: 10
## Number of parameters estimated: 11
## AIC: 13.58, (AIC for lm: 13.019)
## LM test for residual autocorrelation
## test value: 8.365, p-value: 0.0038251
# Model selection
data.frame("MODEL" = c("OLS", "SAR", "SDM"),
"AIC" = c(AIC(tpt.ols), AIC(tpt.sar),AIC(tpt.sdm)))
## MODEL AIC
## 1 OLS 30.46631
## 2 SAR 29.42736
## 3 SDM 13.58005
# Multicolinearity
X <- data.frame(x1, x2,x3,x4)
R <- cor(X) #Correlation
VIF <- diag(solve(R))
VIF
## x1 x2 x3 x4
## 6.201875 1.258235 2.792846 4.369041
# Heteroskedasticity
bptest.Sarlm(tpt.sdm)
##
## studentized Breusch-Pagan test
##
## data:
## BP = 6.2384, df = 8, p-value = 0.6206
# Normality
shapiro.test(residuals(tpt.sdm))
##
## Shapiro-Wilk normality test
##
## data: residuals(tpt.sdm)
## W = 0.9151, p-value = 0.3179
#Non-autocorrelation
moran.test(residuals(tpt.sdm), W)
##
## Moran I test under randomisation
##
## data: residuals(tpt.sdm)
## weights: W
##
## Moran I statistic standard deviate = -1.3957, p-value = 0.9186
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.3997080 -0.1111111 0.0427554
# Marginal effects
impacts(tpt.sdm, listw=W)
## Impact measures (mixed, exact):
## Direct Indirect Total
## x1 4.325097e-01 -3.157405e+00 -2.724895e+00
## x2 7.721762e-02 -3.529534e-01 -2.757358e-01
## x3 -7.134743e-06 -3.498629e-05 -4.212103e-05
## x4 2.532923e+00 1.392480e+00 3.925403e+00