rm(list = ls())
### From CRAN
if(!require(pacman)) install.packages("pacman")
pacman::p_load("tidyverse", "raster", "sp", "sf",
"scales", "ggsn", "dbscan", "rgdal",
"RSQLite", "plotly", "mapview", "leaflet",
"spdep", "spatialreg", "lmtest")
### From Github
if (!require("rspatial")) remotes::install_github('rspatial/rspatial');
library(rspatial)
Data Spatial dapat diunduh disini https://gadm.org/. Data untuk analisis dapat diunduh disini https://sumut.bps.go.id/.
Persentase Penduduk Miskin Menurut Kabupaten Kota di SUmatera Utara (Persen), 2015
## Package sf
sumut_map <- st_read("shape/indonesia.shp/indonesia.shp",
layer = "indonesia") %>%
subset(FIRST_PROV=="SUMATERA UTARA") %>%
janitor::clean_names() %>%
rename(kabupaten_kota = first_kabk_1)
Reading layer `indonesia' from data source `C:\Users\Ghozy Haqqoni\Documents\!Kuliah\Semester_6_DS\GIS\gis_midterm2\UAS_GIS\shape\indonesia.shp\indonesia.shp' using driver `ESRI Shapefile'
Simple feature collection with 497 features and 6 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 95.00971 ymin: -11.00762 xmax: 141.0194 ymax: 6.07694
epsg (SRID): 4326
proj4string: +proj=longlat +datum=WGS84 +no_defs
sumut_map %>% plot()
#list kota/kab
sumut_map %>%
dplyr::select(kabupaten_kota, kodekab) %>%
as_tibble() %>%
head(5)
## Package rgdal
sumut_ogr <- readOGR("shape/indonesia.shp/indonesia.shp",
layer = "indonesia") %>%
subset(FIRST_PROV=="SUMATERA UTARA")
OGR data source with driver: ESRI Shapefile
Source: "C:\Users\Ghozy Haqqoni\Documents\!Kuliah\Semester_6_DS\GIS\gis_midterm2\UAS_GIS\shape\indonesia.shp\indonesia.shp", layer: "indonesia"
with 497 features
It has 6 fields
Integer64 fields read as strings: COUNT
tpt laju pertumbuhan ekonomi angka melek huruf rt2 lama sekolah
data_sumut <- readxl::read_xlsx("SumutData.xlsx", sheet = 1)
data_sumut <- data_sumut %>%
as_tibble() %>%
janitor::clean_names() %>%
mutate_at("kabupaten_kota", toupper) %>%
mutate(kodekab = c(1201:1225, 1271:1278)) %>%
rename(kemiskinan_percent = persentase_penduduk_miskin_persen,
tpt = tingkat_pengangguran_terbuka_penduduk_umur_15_tahun_keatas_persen,
ikk = indeks_kemahalan_konstruksi_kab_kota,
pdrb = pdrb_atas_dasar_harga_konstan_2010_milyar_rupiah,
rt2sekolah = rata_rata_lama_sekolah)
sumut_map2 <- data.frame(sumut_map, data_sumut)
data_sumut %>% arrange(desc(kemiskinan_percent))
sumut_f <- sumut_ogr %>%
fortify(region = "KODEKAB") %>%
mutate(id = as.numeric(id))
sumut_fnew <- sumut_f %>% drop_na()
#coordinates (head)
sumut_fnew %>%
select(long, lat) %>%
as_tibble() %>%
head(5)
sumut <- sumut_fnew %>%
inner_join(data_sumut, by = c("id" = "kodekab"))
theme_map <- function(...) {
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#61B8F2", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA),
panel.border = element_rect(colour = "black",
fill = NA,
size = 3),
...
)
}
(tematik <- ggplot(sumut) +
geom_polygon(aes(fill = kemiskinan_percent,
x = long,
y = lat,
group = group)) +
# municipality outline
geom_path(aes(x = long,
y = lat,
group = group),
color = "white", size = .05) +
coord_equal() +
# define colors
scale_fill_continuous(low = "#fff7ec",
high = "#277842")+
labs(title = "Peta Tematik Sumatera Utara",
subtitle = "Persentase Kemiskinan Penduduk Sumatera Utara 2015",
caption="Sumber: https://www.bps.go.id") +
scalebar(sumut, dist = 75, dist_unit = "km",
transform = TRUE, model = "WGS84",
location = "bottomright",
st.size = 3)+
north(sumut,location = "topright",symbol = 12)+
blank()+
theme_map())
mapviewOptions(default = TRUE)
mapview(sumut_map)
data_sumut %>% names()
[1] "kabupaten_kota" "kemiskinan_percent" "tpt" "ikk"
[5] "pdrb" "rt2sekolah" "kodekab"
## OLS
mod <- kemiskinan_percent ~ tpt + ikk + pdrb + rt2sekolah
model <- lm(mod, data_sumut)
summary(model)
Call:
lm(formula = mod, data = data_sumut)
Residuals:
Min 1Q Median 3Q Max
-6.4065 -2.2168 -0.2287 1.9499 10.1381
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -7.804e+00 1.508e+01 -0.518 0.608786
tpt 5.736e-01 2.650e-01 2.165 0.039103 *
ikk 3.894e-01 1.297e-01 3.004 0.005568 **
pdrb -2.681e-05 3.438e-05 -0.780 0.442083
rt2sekolah -2.340e+00 5.889e-01 -3.973 0.000452 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.076 on 28 degrees of freedom
Multiple R-squared: 0.5986, Adjusted R-squared: 0.5413
F-statistic: 10.44 on 4 and 28 DF, p-value: 2.643e-05
pdrb ga signifikan –> hapus tidak ada indikasi multikol (vif <5)
data_sumut <- data_sumut %>%
select(-c(pdrb))
mod <- kemiskinan_percent ~ tpt + ikk + rt2sekolah
model <- lm(mod, data_sumut)
summary(model)
Call:
lm(formula = mod, data = data_sumut)
Residuals:
Min 1Q Median 3Q Max
-7.1357 -2.2102 -0.3431 1.9384 10.1002
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -8.2384 14.9634 -0.551 0.586144
tpt 0.5340 0.2583 2.067 0.047757 *
ikk 0.3986 0.1283 3.108 0.004195 **
rt2sekolah -2.4074 0.5785 -4.162 0.000257 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.049 on 29 degrees of freedom
Multiple R-squared: 0.5899, Adjusted R-squared: 0.5475
F-statistic: 13.9 on 3 and 29 DF, p-value: 8.432e-06
## Cek adanya Multikolinearitas
cor <- data_sumut %>%
select(-c(kabupaten_kota, kemiskinan_percent, kodekab)) %>%
cor()
car::vif(model)
tpt ikk rt2sekolah
1.431351 1.346614 1.498316
cor %>% corrplot:::corrplot(method="color",
type="lower",
order="hclust",
addCoef.col = "black",
tl.col="black",
tl.offset = T, tl.srt = 45,
insig = "blank",
diag=FALSE)
residu <- resid(model) #Mengambil nilai residu dari persamaan
data.frame(residu) %>%
ggplot2::ggplot(aes(residu))+
geom_density(color="black", fill="lightgreen") +
labs(title="Distribusi Residu Normal",x="x", y = "Density") +
theme_minimal() #plot bukti bahwa residu berdistribusi normal
shapiro.test(residu)
Shapiro-Wilk normality test
data: residu
W = 0.97397, p-value = 0.5971
acf(residu)
lmtest::dwtest(model)
Durbin-Watson test
data: model
DW = 1.6726, p-value = 0.1405
alternative hypothesis: true autocorrelation is greater than 0
plot(residu, col = "#009999", lwd=7,
main = "Residual Model")
#check wheterthe residual random or not
sumut_ogr@data <- cbind(sumut_ogr@data, data_sumut)
sumut_ogr@data <- sumut_ogr@data %>% select(-kodekab)
sumut_ogr$residuals<-residuals(model)
sumut_ogr %>% class()
[1] "SpatialPolygonsDataFrame"
attr(,"package")
[1] "sp"
(nb <- poly2nb(sumut_ogr))
Neighbour list object:
Number of regions: 33
Number of nonzero links: 112
Percentage nonzero weights: 10.28466
Average number of links: 3.393939
sumut_ogr@data %>% head(5)
par(mai=c(0,0,0,0))
plot(sumut_ogr, col=muted("green"))
xy <- coordinates(sumut_ogr)
points(xy, cex=6, pch=20, col='white')
text(sumut_ogr, 'FIRST_KABK', cex=.75)
# ?text
par(mai=c(0,0,0,0))
plot(sumut_ogr)
plot(nb, coordinates(sumut_ogr), col='red',
lwd=5, add=TRUE)
#We can use the neighbour list object to get the average value for the neighbors of each polygon.
resnb<- sapply(nb, function(x) mean(sumut_ogr$residuals[x]))
cor(sumut_ogr$residuals, resnb)
[1] 0.2882114
plot(sumut_ogr$residuals, resnb,
xlab = "Residuals",
ylab = "Mean adjacent residuals",
main = "Resiual Plot")
plot(density(sumut_ogr$residuals),
main = "Residuals Distribution")
lw<-nb2listw(nb)
lw
Characteristics of weights list object:
Neighbour list object:
Number of regions: 33
Number of nonzero links: 112
Percentage nonzero weights: 10.28466
Average number of links: 3.393939
Weights style: W
Weights constants summary:
moran(sumut_ogr$residuals, lw, zero.policy = T,
n = length(lw$neighbours), S0 = Szero(lw))
$I
[1] 0.1926855
$K
[1] 3.226565
#jika tidak signifikan stop
set.seed(Sys.Date())
moran.test(sumut_ogr$residuals, lw)
Moran I test under randomisation
data: sumut_ogr$residuals
weights: lw
Moran I statistic standard deviate = 1.6518, p-value = 0.04928
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
0.19268551 -0.03125000 0.01837867
moran.mc(sumut_ogr$residuals, lw, nsim = 9999)
Monte-Carlo simulation of Moran I
data: sumut_ogr$residuals
weights: lw
number of simulations + 1: 10000
statistic = 0.19269, observed rank = 9381, p-value = 0.0619
alternative hypothesis: greater
plotMoran <- function(data, weight){
moran.plot(data$residuals, weight)
}
plotMoran(sumut_ogr, lw)
lm.LMtests(model, lw, test = c("LMerr", "LMlag"))
Lagrange multiplier diagnostics for spatial dependence
data:
model: lm(formula = mod, data = data_sumut)
weights: lw
LMerr = 1.774, df = 1, p-value = 0.1829
Lagrange multiplier diagnostics for spatial dependence
data:
model: lm(formula = mod, data = data_sumut)
weights: lw
LMlag = 6.2673, df = 1, p-value = 0.0123
LM lag sign di 5% aplha –> SAR
SAR <- spatialreg::lagsarlm(mod,
data = sumut_ogr,
lw,
tol.solve=1.0e-15)
SAR %>% summary()
Call:spatialreg::lagsarlm(formula = mod, data = sumut_ogr, listw = lw, tol.solve = 1e-15)
Residuals:
Min 1Q Median 3Q Max
-6.104528 -1.371283 -0.044266 1.137193 9.150259
Type: lag
Coefficients: (asymptotic standard errors)
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.81639 11.87739 -0.3213 0.74797
tpt 0.36454 0.21134 1.7249 0.08454
ikk 0.19550 0.10860 1.8003 0.07182
rt2sekolah -1.25318 0.51282 -2.4437 0.01454
Rho: 0.49943, LR test value: 8.3411, p-value: 0.0038758
Asymptotic standard error: 0.13605
z-value: 3.6709, p-value: 0.00024173
Wald statistic: 13.475, p-value: 0.00024173
Log likelihood: -86.66976 for lag model
ML residual variance (sigma squared): 10.325, (sigma: 3.2133)
Number of observations: 33
Number of parameters estimated: 6
AIC: 185.34, (AIC for lm: 191.68)
LM test for residual autocorrelation
test value: 1.0388, p-value: 0.3081
sumut_ogr$residuals <- residuals(SAR)
moran.mc(sumut_ogr$residuals, lw, nsim = 9999)
Monte-Carlo simulation of Moran I
data: sumut_ogr$residuals
weights: lw
number of simulations + 1: 10000
statistic = -0.07678, observed rank = 3773, p-value = 0.6227
alternative hypothesis: greater
plotMoran(sumut_ogr, lw)
insignificant -> autokorelasi solved dengan model SAR