# library
suppressPackageStartupMessages({
library(dplyr) # for "glimpse"
library(ggplot2)
library(scales) # for "comma"
library(magrittr)
library(sf)
library(ncdf4)
library(tidyverse)
library(reticulate)
library(gstat)
library(sp)
library(raster)
library(Metrics)
library(gganimate)
library(spdep)
library(spatialreg)
library(olsrr)
library(car)
library(corrplot)
library(lmtest)
library(tmap)
library(tmaptools)
})
## Warning: package 'reticulate' was built under R version 4.4.2
## Warning: package 'gstat' was built under R version 4.4.2
## Warning: package 'raster' was built under R version 4.4.2
## Warning: package 'gganimate' was built under R version 4.4.2
## Warning: package 'spdep' was built under R version 4.4.2
## Warning: package 'spatialreg' was built under R version 4.4.2
## Warning: package 'Matrix' was built under R version 4.4.2
## Warning: package 'olsrr' was built under R version 4.4.2
# Set working directory
setwd("C:/Users/suhaila/Downloads")
# Set working directory
# Load data
data <- read.csv("Data UAS - Sheet1 (1).csv", dec=",")
kaltim <- read_sf("Kalimantan_Timur_ADMIN_BPS.shp")
kaltim <- as_Spatial(kaltim)
class(kaltim)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(slot(kaltim,"data"))
## 'data.frame': 10 obs. of 6 variables:
## $ ADM0_EN : chr "Indonesia" "Indonesia" "Indonesia" "Indonesia" ...
## $ date : Date, format: "2019-12-20" "2019-12-20" ...
## $ validOn : Date, format: "2020-04-01" "2020-04-01" ...
## $ PROVINCE : chr "Kalimantan Timur" "Kalimantan Timur" "Kalimantan Timur" "Kalimantan Timur" ...
## $ Kabupaten: chr "Berau" "Kota Balikpapan" "Kota Bontang" "Kota Samarinda" ...
## $ PRV2 : chr "Kalimantan_Timur" "Kalimantan_Timur" "Kalimantan_Timur" "Kalimantan_Timur" ...
# Hilangkan tanda titik dan ubah koma menjadi titik (jika ada)
data$PDRB <- gsub("\\.", "", data$PDRB) # Hapus tanda titik
data$PDRB <- gsub(",", ".", data$PDRB) # Ganti koma dengan titik
# Konversi ke numerik
data$PDRB <- as.numeric(data$PDRB)
# Cek hasilnya
str(data$PDRB)
## num [1:10] 3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
# Coordinate
IDs <- kaltim$Kabupaten
coords<-coordinates(kaltim)
# Checking missing value
colSums(is.na(data))
## Kabupaten.Kota Tahun
## 0 0
## TPT TPAK
## 0 0
## IPM Jumlah.Penduduk.Miskin
## 0 0
## Indeks.kemahalan.konstruksi Umur.Harapan.Hidup
## 0 0
## Rata.Rata.Lama.Sekolah Laju.Pertumbuhan.Penduduk
## 0 0
## Indeks.Gini PDRB
## 0 0
## Realisasi.Modal.Investasi.PMA
## 0
str(data)
## 'data.frame': 10 obs. of 13 variables:
## $ Kabupaten.Kota : chr "Balikpapan" "Berau" "Bontang " "Kutai Barat " ...
## $ Tahun : int 2023 2023 2023 2023 2023 2023 2023 2023 2023 2023
## $ TPT : num 6.09 4.95 7.74 6.16 4.05 5.93 2.09 4.72 2.07 5.92
## $ TPAK : num 63.5 66.9 68.3 70.5 65.3 ...
## $ IPM : num 82 76.7 81.6 74 76 ...
## $ Jumlah.Penduduk.Miskin : num 14.99 13.26 7.71 14.69 60.86 ...
## $ Indeks.kemahalan.konstruksi : num 111 113 114 121 110 ...
## $ Umur.Harapan.Hidup : num 74.9 72.4 74.7 73.2 72.8 ...
## $ Rata.Rata.Lama.Sekolah : num 10.93 9.56 10.92 8.85 9.26 ...
## $ Laju.Pertumbuhan.Penduduk : num 1.14 1.48 1.39 1.01 1.35 1.73 1.39 1.13 3.73 0.99
## $ Indeks.Gini : num 0.323 0.327 0.299 0.277 0.284 0.336 0.33 0.292 0.299 0.323
## $ PDRB : num 3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
## $ Realisasi.Modal.Investasi.PMA: num 77904 105780 331154 458236 46997 ...
#Define datasets
kaltim@data <- data[,c(-1,-1)]
y <- kaltim$IPM
x1 <- kaltim$TPT
x2 <- kaltim$Laju.Pertumbuhan.Penduduk
x3 <- kaltim$Realisasi.Modal.Investasi.PMA
x4 <- kaltim$Rata.Rata.Lama.Sekolah
# Linear model OLS
tpt.ols <- lm(y ~ x1 + x2 + x3+x4)
summary(tpt.ols)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -0.04317 0.20517 -1.10626 0.25321 0.95799 -0.65032 -1.68771 0.88144
## 9 10
## 0.37643 0.81322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.341e+01 6.440e+00 5.188 0.00350 **
## x1 -5.167e-02 5.488e-01 -0.094 0.92866
## x2 8.842e-01 6.009e-01 1.471 0.20116
## x3 2.255e-06 4.081e-06 0.553 0.60435
## x4 4.373e+00 8.381e-01 5.218 0.00342 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.192 on 5 degrees of freedom
## Multiple R-squared: 0.9534, Adjusted R-squared: 0.9161
## F-statistic: 25.57 on 4 and 5 DF, p-value: 0.001587
library(car)
# Multicolinearity
vif(tpt.ols)
## x1 x2 x3 x4
## 6.316557 1.481870 2.611942 4.368378
library(lmtest)
# Heteroskedasticity
bptest(tpt.ols)
##
## studentized Breusch-Pagan test
##
## data: tpt.ols
## BP = 3.6986, df = 4, p-value = 0.4483
# Normality
e <- residuals(tpt.ols)
shapiro.test(e)
##
## Shapiro-Wilk normality test
##
## data: e
## W = 0.9078, p-value = 0.2662
# Pastikan paket yang dibutuhkan sudah terpasang
library(spdep)
library(sp)
str(coords) # Periksa struktur data
## num [1:10, 1:2] 117 117 117 117 116 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:10] "1" "2" "3" "4" ...
## ..$ : NULL
# 1. Matriks Bobot KNN (contoh 3 tetangga terdekat)
k_neighbors1 <- 3
knn_weights1 <- knearneigh(coords, k = k_neighbors1)
knn_nb1 <- knn2nb(knn_weights1)
knn_listw1 <- nb2listw(knn_nb1, style = "W") # Menggunakan bobot standar
k_neighbors2 <- 4
knn_weights2 <- knearneigh(coords, k = k_neighbors2)
## Warning in knearneigh(coords, k = k_neighbors2): k greater than one-third of
## the number of data points
knn_nb2 <- knn2nb(knn_weights2)
knn_listw2 <- nb2listw(knn_nb2, style = "W") # Menggunakan bobot standar
# 2. Matriks Bobot Queen
queen_nb <- poly2nb(kaltim)
queen_listw <- nb2listw(queen_nb, style = "W") # Menggunakan bobot standar
# 3. Matriks Bobot Rook
rook_nb <- poly2nb(kaltim, queen = FALSE)
rook_listw <- nb2listw(rook_nb, style = "W") # Menggunakan bobot standar
# Cek apakah variabel_target ada dan benar
str(data) # Periksa data untuk memastikan ada variabel IPM
## 'data.frame': 10 obs. of 13 variables:
## $ Kabupaten.Kota : chr "Balikpapan" "Berau" "Bontang " "Kutai Barat " ...
## $ Tahun : int 2023 2023 2023 2023 2023 2023 2023 2023 2023 2023
## $ TPT : num 6.09 4.95 7.74 6.16 4.05 5.93 2.09 4.72 2.07 5.92
## $ TPAK : num 63.5 66.9 68.3 70.5 65.3 ...
## $ IPM : num 82 76.7 81.6 74 76 ...
## $ Jumlah.Penduduk.Miskin : num 14.99 13.26 7.71 14.69 60.86 ...
## $ Indeks.kemahalan.konstruksi : num 111 113 114 121 110 ...
## $ Umur.Harapan.Hidup : num 74.9 72.4 74.7 73.2 72.8 ...
## $ Rata.Rata.Lama.Sekolah : num 10.93 9.56 10.92 8.85 9.26 ...
## $ Laju.Pertumbuhan.Penduduk : num 1.14 1.48 1.39 1.01 1.35 1.73 1.39 1.13 3.73 0.99
## $ Indeks.Gini : num 0.323 0.327 0.299 0.277 0.284 0.336 0.33 0.292 0.299 0.323
## $ PDRB : num 3.75e+07 2.38e+07 1.35e+08 1.05e+08 3.16e+07 ...
## $ Realisasi.Modal.Investasi.PMA: num 77904 105780 331154 458236 46997 ...
variabel_target <- data$IPM
# 4. Hitung nilai Moran's I untuk KNN
knn_moran1 <- moran.test(variabel_target, knn_listw1)
print(knn_moran1)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: knn_listw1
##
## Moran I statistic standard deviate = -1.6416, p-value = 0.9497
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.4107382 -0.1111111 0.0333124
knn_moran2 <- moran.test(variabel_target, knn_listw2)
print(knn_moran2)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: knn_listw2
##
## Moran I statistic standard deviate = -1.2909, p-value = 0.9016
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.29495011 -0.11111111 0.02028143
# 5. Hitung nilai Moran's I untuk Queen
queen_moran <- moran.test(variabel_target, queen_listw)
print(queen_moran)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: queen_listw
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.9671
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
# 6. Hitung nilai Moran's I untuk Rook
rook_moran <- moran.test(variabel_target, rook_listw)
print(rook_moran)
##
## Moran I test under randomisation
##
## data: variabel_target
## weights: rook_listw
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.9671
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
# 7. Bandingkan hasil Moran's I
moran_results <- data.frame(
Method = c("KNN 3", "KNN 4", "Queen", "Rook"),
Moran_I = c(knn_moran1$estimate[1], knn_moran2$estimate[1], queen_moran$estimate[1], rook_moran$estimate[1]),
Expectation = c(knn_moran1$estimate[2], knn_moran2$estimate[2], queen_moran$estimate[2], rook_moran$estimate[2]),
Variance = c(knn_moran1$estimate[3], knn_moran2$estimate[3], queen_moran$estimate[3], rook_moran$estimate[3])
)
print(moran_results)
## Method Moran_I Expectation Variance
## 1 KNN 3 -0.4107382 -0.1111111 0.03331240
## 2 KNN 4 -0.2949501 -0.1111111 0.02028143
## 3 Queen -0.5354349 -0.1111111 0.05317398
## 4 Rook -0.5354349 -0.1111111 0.05317398
# Spatial weight matrix
library(spdep)
#KNN
W.K <- knn2nb(knearneigh(coords, k=3 ,longlat=TRUE))
WL.K <- nb2listw(W.K,style='W') ; WL.K
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 30
## Percentage nonzero weights: 30
## Average number of links: 3
## Non-symmetric neighbours list
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 5.555556 42.44444
#KNN
W.K1 <- knn2nb(knearneigh(coords, k=4 ,longlat=TRUE))
## Warning in knearneigh(coords, k = 4, longlat = TRUE): k greater than one-third
## of the number of data points
WL.K1 <- nb2listw(W.K,style='W') ; WL.K1
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 30
## Percentage nonzero weights: 30
## Average number of links: 3
## Non-symmetric neighbours list
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 5.555556 42.44444
#Queen
W.Q <- poly2nb(kaltim, row.names=IDs, queen=TRUE)
WL.Q <- nb2listw(W.Q,style='W', zero.policy = TRUE) ; WL.Q
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 28
## Percentage nonzero weights: 28
## Average number of links: 2.8
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 8.053571 48.60714
#Rook
list.queen <- poly2nb(kaltim, row.names=IDs, queen=FALSE)
W <- nb2listw(list.queen, style="W", zero.policy=TRUE); W
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 10
## Number of nonzero links: 28
## Percentage nonzero weights: 28
## Average number of links: 2.8
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 10 100 10 8.053571 48.60714
WB <- nb2mat(list.queen, style='W', zero.policy = TRUE)
#Moran's I
IPM<-data$IPM
moran.test(IPM, WL.K, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: WL.K
##
## Moran I statistic standard deviate = -1.6416, p-value = 0.1007
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.4107382 -0.1111111 0.0333124
moran.test(IPM, WL.K1, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: WL.K1
##
## Moran I statistic standard deviate = -1.6416, p-value = 0.1007
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.4107382 -0.1111111 0.0333124
moran.test(IPM, W, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: W
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.06575
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
moran.test(IPM, WL.Q, alternative = "two.sided")
##
## Moran I test under randomisation
##
## data: IPM
## weights: WL.Q
##
## Moran I statistic standard deviate = -1.8401, p-value = 0.06575
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## -0.53543494 -0.11111111 0.05317398
moran.lm<-lm.morantest(tpt.ols, W, alternative="two.sided")
print(moran.lm)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## weights: W
##
## Moran I statistic standard deviate = 2.323, p-value = 0.02018
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## 0.44941041 -0.02167962 0.04112482
moran.plot(y, W, labels=IDs, xlim=c(0, 100), ylim=c(0,100))

Local_Moran <- localmoran(data$IPM, W)
quadr_data <- attr(Local_Moran, "quadr")
mean_values <- quadr_data$mean
kaltim_sf <- st_as_sf(kaltim)
mean_values_char <- as.character(attr(Local_Moran, "quadr")$mean)
kaltim_sf$mean_values <- mean_values_char
ggplot() +
geom_sf(data = kaltim_sf, aes(fill = mean_values)) + # Fill based on 'mean_values'
scale_fill_manual(values = c("Low-Low" = "blue", "High-Low" = "green",
"Low-High" = "yellow", "High-High" = "red")) +
labs(title = "Local Moran's I Mean Values in Kalimantan Timur", fill = "Mean Type") +
theme_minimal()

# Lagrange multiplier test
lm.RStests(tpt.ols, W, test="all")
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## RSerr = 2.5078, df = 1, p-value = 0.1133
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## RSlag = 0.043798, df = 1, p-value = 0.8342
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## adjRSerr = 3.7011, df = 1, p-value = 0.05438
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## adjRSlag = 1.2371, df = 1, p-value = 0.266
##
##
## Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
## dependence
##
## data:
## model: lm(formula = y ~ x1 + x2 + x3 + x4)
## test weights: W
##
## SARMA = 3.7449, df = 2, p-value = 0.1537
# Spatial error model
tpt.sem <- errorsarlm(y ~ x1 + x2 + x3+x4, listw = W)
summary(tpt.sem)
##
## Call:errorsarlm(formula = y ~ x1 + x2 + x3 + x4, listw = W)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09184 -0.27903 0.13272 0.44116 0.62171
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.1121e+01 3.6930e+00 8.4270 < 2.2e-16
## x1 -1.1086e-01 2.7101e-01 -0.4090 0.682507
## x2 7.3220e-01 2.3134e-01 3.1650 0.001551
## x3 3.6930e-06 1.3898e-06 2.6573 0.007877
## x4 4.6831e+00 4.6115e-01 10.1554 < 2.2e-16
##
## Lambda: 0.75902, LR test value: 5.3862, p-value: 0.020296
## Asymptotic standard error: 0.14829
## z-value: 5.1184, p-value: 3.0816e-07
## Wald statistic: 26.198, p-value: 3.0816e-07
##
## Log likelihood: -9.784431 for error model
## ML residual variance (sigma squared): 0.3209, (sigma: 0.56648)
## Number of observations: 10
## Number of parameters estimated: 7
## AIC: 33.569, (AIC for lm: 36.955)
# Model selection
data.frame("MODEL" = c("OLS", "SEM"),
"AIC" = c(AIC(tpt.ols),AIC(tpt.sem)))
## MODEL AIC
## 1 OLS 36.95508
## 2 SEM 33.56886
# Multicolinearity
X <- data.frame(x1, x2,x3,x4)
R <- cor(X) #Correlation
VIF <- diag(solve(R))
VIF
## x1 x2 x3 x4
## 6.316557 1.481870 2.611942 4.368378
# Heteroskedasticity
bptest.Sarlm(tpt.sem)
##
## studentized Breusch-Pagan test
##
## data:
## BP = 1.0954, df = 4, p-value = 0.895
# Normality
shapiro.test(residuals(tpt.sem))
##
## Shapiro-Wilk normality test
##
## data: residuals(tpt.sem)
## W = 0.88272, p-value = 0.1402
#Non-autocorrelation
moran.test(residuals(tpt.sem), W,alternative="two.sided")
##
## Moran I test under randomisation
##
## data: residuals(tpt.sem)
## weights: W
##
## Moran I statistic standard deviate = 1.357, p-value = 0.1748
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic Expectation Variance
## 0.19684534 -0.11111111 0.05150153