Spatial Regression Model

Author

Rosita Ria Rusesta (M0501251016)

1. Model Prediksi GRP Data China

library(sf)
Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(readxl)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.1     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(spdep)
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'spatialreg'

The following objects are masked from 'package:spdep':

    get.ClusterOption, get.coresOption, get.mcOption,
    get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
    set.coresOption, set.mcOption, set.VerboseOption,
    set.ZeroPolicyOption
library(MuMIn)
Registered S3 method overwritten by 'MuMIn':
  method     from      
  nobs.Sarlm spatialreg
shp_china <- st_read("China29/China29.shp")
Reading layer `China29' from data source 
  `/Users/rositariarusesta/Desktop/Tugas Spasial Pertemuan 11/China29/China29.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 29 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 73.5577 ymin: 20.22264 xmax: 134.7739 ymax: 53.56086
Geodetic CRS:  WGS 84
data_china <- read_excel("Data_China.xlsx")

china_join <- shp_china %>%
  left_join(data_china,
            by = c("NAME_1" = "Region"))
head(china_join)
Simple feature collection with 6 features and 16 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 92.76416 ymin: 20.22264 xmax: 120.8376 ymax: 42.77393
Geodetic CRS:  WGS 84
  ID_0 ISO NAME_0 ID_1    NAME_1 HASC_1 CCN_1 CCA_1    TYPE_1    ENGTYPE_1
1   49 CHN  China    1     Anhui  CN.AH     0  <NA>     Sh?ng     Province
2   49 CHN  China    2   Beijing  CN.BJ     0  <NA> Zhíxiáshì Municipality
3   49 CHN  China    3 Chongqing  CN.CQ     0  <NA> Zhíxiáshì Municipality
4   49 CHN  China    4    Fujian  CN.FJ     0  <NA>     Sh?ng     Province
5   49 CHN  China    5     Gansu  CN.GS     0  <NA>     Sh?ng     Province
6   49 CHN  China    6 Guangdong  CN.GD     0  <NA>     Sh?ng     Province
  NL_NAME_1 VARNAME_1     Long      Lat   GRP Level_of_Urbanization
1     ??|??     ?nhu? 117.2262 31.82579 28792                 46.50
2     ??|??   B?ij?ng 116.4107 40.18491 87475                 86.20
3     ??|?? Chóngqìng 107.8748 30.05865 38914                 56.98
4        ??    Fújiàn 117.9815 26.08641 52763                 59.60
5     ??|??     G?nsù 100.9339 37.82079 21978                 38.75
6     ??|?? Gu?ngd?ng 113.4164 23.34225 54095                 67.40
                        geometry
1 MULTIPOLYGON (((116.4263 34...
2 MULTIPOLYGON (((116.6669 40...
3 MULTIPOLYGON (((108.5419 32...
4 MULTIPOLYGON (((117.689 23....
5 MULTIPOLYGON (((97.18472 42...
6 MULTIPOLYGON (((110.1182 20...
GRP <- ggplot(data = china_join) +
  geom_sf(
    aes(fill = GRP),
    color = "white",
    linewidth = 0.2
  ) +
  
  scale_fill_viridis_c(
    option = "magma",
    name = "GRP"
  ) +
  
  labs(
    title = "Gross Regional Product di China",
    subtitle = "Data kontinu per provinsi",
    x = "Longitude",
    y = "Latitude"
  ) +
  
  theme_minimal() +
  
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11)
  )

GRP

Urbanization <- ggplot(data = china_join) +
  geom_sf(
    aes(fill = `Level_of_Urbanization`),
    color = "white",
    linewidth = 0.2
  ) +
  
  scale_fill_viridis_c(
    option = "magma",
    name = "Level of Urbanization"
  ) +
  
  labs(
    title = "Level of Urbanization di China",
    subtitle = "Data kontinu per provinsi",
    x = "Longitude",
    y = "Latitude"
  ) +
  
  theme_minimal() +
  
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 11)
  )
Urbanization   

Ols Regression (Data China)

china.lm<- lm(GRP ~ Level_of_Urbanization,data=china_join)
summary(china.lm)

Call:
lm(formula = GRP ~ Level_of_Urbanization, data = china_join)

Residuals:
     Min       1Q   Median       3Q      Max 
-12050.9  -4410.4   -374.8   2961.6  14948.1 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)           -31895.5     5197.6  -6.137 1.48e-06 ***
Level_of_Urbanization   1400.0       92.6  15.118 1.06e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6595 on 27 degrees of freedom
Multiple R-squared:  0.8944,    Adjusted R-squared:  0.8904 
F-statistic: 228.6 on 1 and 27 DF,  p-value: 1.064e-14

Matriks Pembobot (Railway Distance)

# Baca jarak kereta dari sheet Railway Distance
rail <- read_excel("Data_China.xlsx", sheet = "Railway Distance", skip = 1)
New names:
• `` -> `...1`
rail_matrix <- as.matrix(rail[, 3:31])
rownames(rail_matrix) <- colnames(rail_matrix)

# Inverse distance (w_ij = 1/d_ij, diagonal = 0)
W_rail <- 1 / rail_matrix
diag(W_rail) <- 0

# Row-standardize
W_rail_std <- W_rail / rowSums(W_rail)

# Buat listw object
W_listw <- mat2listw(W_rail_std, style = "W")

Uji Morans’I (Railway Distance)

col.moran <- lm.morantest(china.lm, W_listw)
col.moran

    Global Moran I for regression residuals

data:  
model: lm(formula = GRP ~ Level_of_Urbanization, data = china_join)
weights: W_listw

Moran I statistic standard deviate = 1.1234, p-value = 0.1306
alternative hypothesis: greater
sample estimates:
Observed Moran I      Expectation         Variance 
     0.010629595     -0.033552435      0.001546711 

Matriks Pembobot Queen Contiguity

coords <- st_coordinates(st_centroid(china_join))
Warning: st_centroid assumes attributes are constant over geometries
knn <- knearneigh(coords, k = 3)
nb_knn <- knn2nb(knn)
lw <- nb2listw(nb_knn, style = "W")

Uji Morans’I (Queen Contiguity)

col.moran <- lm.morantest(china.lm, lw)
col.moran

    Global Moran I for regression residuals

data:  
model: lm(formula = GRP ~ Level_of_Urbanization, data = china_join)
weights: lw

Moran I statistic standard deviate = 1.1146, p-value = 0.1325
alternative hypothesis: greater
sample estimates:
Observed Moran I      Expectation         Variance 
      0.08855909      -0.04904934       0.01524106 

Uji Lagrange Multiplier

china.lagrange <- lm.LMtests(china.lm, lw, test=c("LMerr","RLMerr","LMlag","RLMlag","SARMA"))
Please update scripts to use lm.RStests in place of lm.LMtests
summary(china.lagrange)
    Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
    dependence
data:  
model: lm(formula = GRP ~ Level_of_Urbanization, data = china_join)
test weights: listw
 
         statistic parameter p.value  
RSerr      0.41512         1 0.51938  
adjRSerr   0.02696         1 0.86958  
RSlag      5.19965         1 0.02259 *
adjRSlag   4.81149         1 0.02827 *
SARMA      5.22661         2 0.07329 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Spatial Lag Model

china.lag <- lagsarlm(GRP ~ Level_of_Urbanization,data=china_join,lw)
Warning in lagsarlm(GRP ~ Level_of_Urbanization, data = china_join, lw): inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16 
  reciprocal condition number = 6.37534e-18 - using numerical Hessian.
summary(china.lag)

Call:lagsarlm(formula = GRP ~ Level_of_Urbanization, data = china_join, 
    listw = lw)

Residuals:
      Min        1Q    Median        3Q       Max 
-14910.34  -4430.21    220.29   2658.44  11459.87 

Type: lag 
Coefficients: (numerical Hessian approximate standard errors) 
                        Estimate Std. Error z value  Pr(>|z|)
(Intercept)           -34188.681   4718.001 -7.2464 4.279e-13
Level_of_Urbanization   1300.787     92.981 13.9899 < 2.2e-16

Rho: 0.1676, LR test value: 4.7197, p-value: 0.029818
Approximate (numerical Hessian) standard error: 0.073737
    z-value: 2.2729, p-value: 0.023033
Wald statistic: 5.166, p-value: 0.023033

Log likelihood: -292.7817 for lag model
ML residual variance (sigma squared): 34198000, (sigma: 5847.9)
Number of observations: 29 
Number of parameters estimated: 4 
AIC: 593.56, (AIC for lm: 596.28)
AICc(china.lm)
[1] 597.2431
AICc(china.lag)
[1] 595.23
uji_lr <- anova(china.lag, china.lm)
uji_lr
          Model df    AIC  logLik Test L.Ratio  p-value
china.lag     1  4 593.56 -292.78    1                 
china.lm      2  3 596.28 -295.14    2  4.7197 0.029818

2. Model Dependensi Spasial (Jawa Barat)

Data Kemiskinan Jawa Barat

shp_jabar <- st_read("/Users/rositariarusesta/Desktop/Tugas Spasial Pertemuan 12/Jabar2/Jabar2.shp")
Reading layer `Jabar2' from data source 
  `/Users/rositariarusesta/Desktop/Tugas Spasial Pertemuan 12/Jabar2/Jabar2.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 26 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 106.3705 ymin: -7.823398 xmax: 108.8338 ymax: -5.91377
CRS:           NA
data_jabar <- read_excel("/Users/rositariarusesta/Desktop/Tugas Spasial Pertemuan 12/data_jabar.xlsx")
data_jabar$KABKOTNO <- as.character(data_jabar$KABKOTNO)
jabar <- shp_jabar %>%
  left_join(data_jabar, by = "KABKOTNO") %>%
  na.omit()
colnames(jabar)
 [1] "PROVNO.x"   "KABKOTNO"   "KODE2010.x" "PROVINSI.x" "KABKOT.x"  
 [6] "SUMBER"     "IDSP2010.x" "PROVNO.y"   "KODE2010.y" "PROVINSI.y"
[11] "KABKOT.y"   "IDSP2010.y" "Long"       "Lat"        "p.miskin15"
[16] "p.miskin16" "j.miskin15" "j.miskin16" "AHH2015"    "AHH2016"   
[21] "EYS2015"    "EYS2016"    "MYS2015"    "MYS2016"    "EXP2015"   
[26] "EXP2016"    "APM.SD15"   "APM.SMP15"  "APM.SMA15"  "APM.PT15"  
[31] "APK.SD15"   "APK.SMP15"  "APK.SMA15"  "APK.PT15"   "APS.USIA15"
[36] "APS.USIA2"  "APS.USIA3"  "APS.USIA4"  "geometry"  

a. Matriks Pembobot

# =====================================================
# SPATIAL WEIGHT
# =====================================================
nb_jabar <- suppressWarnings(poly2nb(jabar))

lw_jabar <- nb2listw(
  nb_jabar,
  style = "W",
  zero.policy = TRUE
)

b. Penentuan Variabel (Indeks Moran (+) tertinggi)

# =========================================================
# ANALISIS MORAN'S I RESIDUAL
# MENCARI KOMBINASI X TERBAIK
# DENGAN P-VALUE TERKECIL
# =========================================================

# =========================================================
# 5. DAFTAR VARIABEL
# =========================================================
variabel_list <- c(
  "p.miskin15",
  "AHH2015",
  "EYS2015",
  "MYS2015",
  "EXP2015",
  "APM.SD15",
  "APM.SMP15",
  "APM.SMA15",
  "APM.PT15",
  "APK.SD15",
  "APK.SMP15",
  "APK.SMA15",
  "APK.PT15",
  "APS.USIA15",
  "APS.USIA2",
  "APS.USIA3",
  "APS.USIA4"
)

# =========================================================
# 6. TENTUKAN VARIABEL Y
# =========================================================
variabel_Y <- "p.miskin15"

# variabel kandidat X
variabel_X <- setdiff(
  variabel_list,
  variabel_Y
)

# =========================================================
# 7. DATAFRAME HASIL
# =========================================================
hasil_model <- data.frame(
  Formula = character(),
  Jumlah_X = numeric(),
  Moran_I = numeric(),
  P_Value = numeric(),
  stringsAsFactors = FALSE
)

# =========================================================
# 8. LOOP KOMBINASI VARIABEL
# =========================================================

maksimum_X <- 6

for (k in 1:maksimum_X) {
  # membuat kombinasi
  kombinasi_x <- combn(
    variabel_X,
    k,
    simplify = FALSE
  )
  
  # loop tiap kombinasi
  for (x_set in kombinasi_x) {
    
    tryCatch({
      
      # =====================================================
      # MEMBUAT FORMULA
      # =====================================================
      formula_text <- paste(
        variabel_Y,
        "~",
        paste(x_set, collapse = " + ")
      )
      
      rumus <- as.formula(formula_text)
      
      # =====================================================
      # MODEL OLS
      # =====================================================
      model_ols <- lm(
        rumus,
        data = jabar
      )
      
      # =====================================================
      # MORAN TEST RESIDUAL
      # =====================================================
      uji_moran <- lm.morantest(
        model_ols,
        lw_jabar,
        zero.policy = TRUE
      )
      
      # =====================================================
      # AMBIL NILAI
      # =====================================================
      moran_i <- as.numeric(
        uji_moran$estimate["Observed Moran I"]
      )
      
      p_val <- uji_moran$p.value
      
      # =====================================================
      # SIMPAN JIKA SIGNIFIKAN
      # =====================================================
      if (p_val < 0.05) {
        
        hasil_model <- rbind(
          hasil_model,
          
          data.frame(
            Formula = formula_text,
            Jumlah_X = k,
            Moran_I = moran_i,
            P_Value = p_val
          )
        )
      }
      
    }, error = function(e) {
      
      # abaikan error
      NULL
      
    })
  }
}

# =========================================================
# 9. URUTKAN HASIL
# =========================================================
hasil_model <- hasil_model %>%
  arrange(P_Value)


# =========================================================
# 11. MODEL TERBAIK
# P-VALUE PALING KECIL
# =========================================================


if (nrow(hasil_model) == 0) {
  
  cat("Tidak ada model signifikan.\n")
  
} else {
  
  # ambil model terbaik
  model_terbaik <- hasil_model %>%
    slice(1)
  
  cat(
    "Formula              : ",
    as.character(model_terbaik$Formula),
    "\n",
    sep = ""
  )
  
  cat(
    "Jumlah Variabel X    : ",
    model_terbaik$Jumlah_X,
    "\n",
    sep = ""
  )
  
  cat(
    "Moran's I            : ",
    round(model_terbaik$Moran_I, 5),
    "\n",
    sep = ""
  )
  
  cat(
    "P-Value              : ",
    format(
      model_terbaik$P_Value,
      scientific = FALSE
    ),
    "\n",
    sep = ""
  )
}
Formula              : p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 + APK.PT15 + APS.USIA15
Jumlah Variabel X    : 5
Moran's I            : 0.5012
P-Value              : 0.005018003

c. OLS Regression

jabar.lm <- lm(p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 + APK.PT15 + APS.USIA15, data=jabar)

d. Uji Lagrange Multiplier

jabar.lagrange <- lm.LMtests(jabar.lm, lw_jabar, test=c("LMerr","RLMerr","LMlag","RLMlag","SARMA"))
Please update scripts to use lm.RStests in place of lm.LMtests
summary(jabar.lagrange)
    Rao's score (a.k.a Lagrange multiplier) diagnostics for spatial
    dependence
data:  
model: lm(formula = p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 +
APK.PT15 + APS.USIA15, data = jabar)
test weights: listw
 
         statistic parameter  p.value   
RSerr      7.23955         1 0.007132 **
adjRSerr   5.48939         1 0.019132 * 
RSlag      2.01129         1 0.156133   
adjRSlag   0.26113         1 0.609346   
SARMA      7.50067         2 0.023510 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

e. Spatial Error Model

jabar.err <- errorsarlm(p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 + APK.PT15 + APS.USIA15,data=jabar,lw_jabar, zero.policy = TRUE)
summary(jabar.err)

Call:errorsarlm(formula = p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 + 
    APK.PT15 + APS.USIA15, data = jabar, listw = lw_jabar, zero.policy = TRUE)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.96720 -0.97706  0.17825  0.76731  1.45103 

Type: error 
Regions with no neighbours included:
 18 19 21 25 26 
Coefficients: (asymptotic standard errors) 
               Estimate  Std. Error z value  Pr(>|z|)
(Intercept) -425.458866   84.564213 -5.0312 4.874e-07
EYS2015        4.311729    1.035451  4.1641 3.126e-05
APM.SD15      -1.521207    0.226462 -6.7173 1.852e-11
APK.SD15       0.640932    0.101930  6.2880 3.216e-10
APK.PT15      -0.358322    0.069408 -5.1626 2.436e-07
APS.USIA15     4.680477    0.828208  5.6513 1.592e-08

Lambda: 0.9059, LR test value: 23.088, p-value: 1.5476e-06
Asymptotic standard error: 0.060606
    z-value: 14.947, p-value: < 2.22e-16
Wald statistic: 223.43, p-value: < 2.22e-16

Log likelihood: -28.18863 for error model
ML residual variance (sigma squared): 1.0473, (sigma: 1.0234)
Number of observations: 17 
Number of parameters estimated: 8 
AIC: 72.377, (AIC for lm: 93.465)

f. SARMA

jabar.sarma <- sacsarlm(p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 + APK.PT15 + APS.USIA15,lw_jabar, data=jabar, zero.policy = TRUE)
summary(jabar.sarma)

Call:sacsarlm(formula = p.miskin15 ~ EYS2015 + APM.SD15 + APK.SD15 + 
    APK.PT15 + APS.USIA15, data = jabar, listw = lw_jabar, zero.policy = TRUE)

Residuals:
      Min        1Q    Median        3Q       Max 
-1.837342 -0.900247 -0.050073  0.685523  1.606525 

Type: sac 
Coefficients: (asymptotic standard errors) 
               Estimate  Std. Error z value  Pr(>|z|)
(Intercept) -442.074285   83.148799 -5.3167 1.057e-07
EYS2015        4.793132    1.140231  4.2037 2.626e-05
APM.SD15      -1.707710    0.293038 -5.8276 5.623e-09
APK.SD15       0.672837    0.103147  6.5231 6.886e-11
APK.PT15      -0.392055    0.076727 -5.1097 3.226e-07
APS.USIA15     4.939024    0.838588  5.8897 3.869e-09

Rho: 0.20377
Asymptotic standard error: 0.19972
    z-value: 1.0202, p-value: 0.30761
Lambda: 0.91256
Asymptotic standard error: 0.065071
    z-value: 14.024, p-value: < 2.22e-16

LR test value: 23.918, p-value: 6.4015e-06

Log likelihood: -27.77358 for sac model
ML residual variance (sigma squared): 0.97176, (sigma: 0.98578)
Number of observations: 17 
Number of parameters estimated: 9 
AIC: 73.547, (AIC for lm: 93.465)
AICc(jabar.lm)
[1] 105.9096
AICc(jabar.err)
[1] 90.37725
AICc(jabar.sarma)
[1] 99.26145
lrtest1<-anova(jabar.err,jabar.lm)
lrtest1
          Model df    AIC  logLik Test L.Ratio    p-value
jabar.err     1  8 72.377 -28.189    1                   
jabar.lm      2  7 93.465 -39.733    2  23.088 1.5476e-06
lrtest2<-anova(jabar.sarma,jabar.err)
lrtest2
            Model df    AIC  logLik Test L.Ratio p-value
jabar.sarma     1  9 73.547 -27.774    1                
jabar.err       2  8 72.377 -28.189    2 0.83008 0.36225