library(plm)
library(spdep)
library(readxl)
library(tidyverse)
library(dplyr)
library(tidyr)
library(DT)
library(ggplot2)
library(corrplot)
library(sf) #spatial
library(gridExtra)
library(dlookr) #data descriptive (univar_numeric)
library(ggrepel) #mean median in time series
library(imputeTS)
library(psych)
library(ggtext)
library(MASS)
library(ggpubr) #ggdensity dan ggqqplot
library(lmtest)
library(stargazer)
library(caret)
library(reshape2) #melt matriks bobot menjadi dataframe
library(sp)
library(kableExtra)
library(splm)
library(plm) #no spatial
library(nortest) #anderson darling assumption test
data <- read_excel("/Users/Dicky-Jr.DataAnalyst/Desktop/2023 Kolokium/data_penelitian.xlsx", sheet = "Sheet4")
data <- data %>%
pivot_longer(cols = -Provinsi,
names_to = c(".value", "Tahun"),
names_pattern = "(\\w+)_(\\d{4})")
data
## # A tibble: 238 × 9
## Provinsi Tahun Y X1 X2 X3 X4 X5 X6
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACEH 2016 26.4 38.0 54.4 74.5 3.48 0.448 2.86
## 2 ACEH 2017 35.7 49.8 62.1 70.6 2.98 0.448 3.62
## 3 ACEH 2018 37.1 33.3 70.9 70.7 2.84 0.42 4.53
## 4 ACEH 2019 34.2 49.8 64 70.0 2.64 0.528 4.36
## 5 ACEH 2020 17.2 65.4 68.0 70.1 2.72 0.514 4.33
## 6 ACEH 2021 33.2 66.7 69.3 74.4 2.86 0.503 4.29
## 7 ACEH 2022 31.2 65.9 68.0 70.7 2.49 0.504 6.83
## 8 SUMATERA UTARA 2016 24.4 33.5 64.0 69.7 1.77 0.455 3.47
## 9 SUMATERA UTARA 2017 28.5 50.9 63.6 67.2 1.71 0.439 4.19
## 10 SUMATERA UTARA 2018 32.4 25.7 71.9 68.3 1.56 0.41 3.8
## # ℹ 228 more rows
population_data <- read_excel("/Users/Dicky-Jr.DataAnalyst/Desktop/2023 Kolokium/data_penelitian.xlsx", sheet = "Jumlah balita (0-4 tahun)")
tail(population_data)
## # A tibble: 6 × 8
## Provinsi `2016` `2017` `2018` `2019` `2020` `2021` `2022`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GORONTALO 110965 111773 112523 113200 96307 97119 97911
## 2 SULAWESI BARAT 146737 148496 150143 151664 138394 139181 140197
## 3 MALUKU NA NA NA 206863 159084 160943 162763
## 4 MALUKU UTARA 144215 147064 149896 152711 63452 121967 123232
## 5 PAPUA BARAT 100232 102702 105182 100798 99676 101741 103471
## 6 PAPUA 331600 333019 334219 335253 411808 409159 409358
# Impute missing values using Kalman filtering
imputed_data <- na_kalman(population_data)
population_data <- imputed_data
population_data
## # A tibble: 34 × 8
## Provinsi `2016` `2017` `2018` `2019` `2020` `2021` `2022`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACEH 517269. 568765. 568942 567911 490875 490810 4.92e5
## 2 SUMATERA UTARA 788721 780652 772571 764534 1380042 1375839 1.38e6
## 3 SUMATERA BARAT 545359 542819 540083 537142 456701 456976 4.59e5
## 4 RIAU 752271 733491 739071 743920 610635 612354 6.17e5
## 5 JAMBI 323535 322571 321508 320370 313535 312643 3.13e5
## 6 SUMATERA SELATAN 806001 802114 792985 797591 771284 759987 7.51e5
## 7 BENGKULU 1904793 1934269 1963300 1991838 175561 175513 1.76e5
## 8 LAMPUNG 788255 777676 766872 755887 744815 769955 7.67e5
## 9 KEP. BANGKA BELITUNG 31030 31670 27296 30698 26787 27154 2.76e4
## 10 KEP. RIAU 219096 218574 217246 215244 190488 191988 1.95e5
## # ℹ 24 more rows
data$Y <- data$Y / 100
data$X1 <- data$X1 / 100
data$X2 <- data$X2 / 100
data$X3 <- data$X3 / 100
describe(data)
## vars n mean sd median trimmed mad min max range skew
## Provinsi* 1 238 17.50 9.83 17.50 17.50 12.60 1.00 34.00 33.00 0.00
## Tahun* 2 238 4.00 2.00 4.00 4.00 2.97 1.00 7.00 6.00 0.00
## Y 3 238 0.27 0.07 0.27 0.27 0.06 0.07 0.44 0.37 -0.17
## X1 4 238 0.59 0.13 0.59 0.59 0.13 0.21 0.81 0.60 -0.39
## X2 5 238 0.74 0.12 0.78 0.76 0.08 0.20 0.93 0.73 -1.67
## X3 6 238 0.62 0.11 0.63 0.62 0.09 0.27 0.90 0.63 -0.27
## X4 7 238 1.96 1.46 1.53 1.69 0.99 0.46 9.37 8.91 2.05
## X5 8 238 0.46 0.09 0.48 0.47 0.06 0.12 0.67 0.55 -1.32
## X6 9 238 5.17 1.65 4.89 5.08 1.67 1.56 10.21 8.65 0.50
## kurtosis se
## Provinsi* -1.22 0.64
## Tahun* -1.26 0.13
## Y 0.20 0.00
## X1 -0.33 0.01
## X2 3.50 0.01
## X3 0.76 0.01
## X4 5.00 0.09
## X5 1.94 0.01
## X6 -0.21 0.11
ggplot(data = data, aes(x = Tahun, y = X1, group = Provinsi, color = Provinsi)) +
geom_line() +
geom_point() +
labs(title = "Prevalence of nutritional status categorized as short in children aged 0-59 months",
x = "Year",
y = "Prevalence of nutritional status") +
theme_minimal()
ggplot(data, aes(x = Tahun, y = Y, fill = Tahun)) +
geom_violin(trim = FALSE, scale = "width", draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_boxplot(width = 0.1, color = "black", fill = "white", outlier.shape = NA) + # Add boxplots for medians
labs(title = "Violin Plot for Stunting Prevalences in Indonesia across Years",
x = "Year",
y = "Y") +
theme_minimal() +
scale_fill_brewer(palette = "Set4") # Adjust color palette
## Warning: Unknown palette: "Set4"
vars <- c("Y", "X1", "X2", "X3", "X4", "X5", "X6")
data_subset <- data[, vars]
# Mengganti nama variabel
colnames(data_subset)[2:7] <- c("REB", "PHH", "ECR", "PGI", "GII", "UNH")
# Membuat data dalam format long
data_long <- gather(data_subset, key = "Variable", value = "Value", -Y)
# Plotting menggunakan facet_wrap untuk 2 baris dan 3 kolom
ggplot(data_long, aes(x = Y, y = Value)) +
geom_point(color = "goldenrod3") + # Mengatur warna titik
facet_wrap(~ Variable, scales = "free", nrow = 2, ncol = 3) +
labs(title = "Hubungan antara Y dengan X1-X6",
x = "Dependent Variables",
y = "Values") +
theme_light()
summary_table <- data %>%
group_by(Tahun) %>%
summarise(Median = median(Y),
Mean = mean(Y),
SD = sd(Y),
Min = min(Y),
Max = max(Y))
print(summary_table)
## # A tibble: 7 × 6
## Tahun Median Mean SD Min Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016 0.267 0.275 0.0531 0.192 0.397
## 2 2017 0.306 0.306 0.0563 0.191 0.403
## 3 2018 0.312 0.303 0.0530 0.176 0.427
## 4 2019 0.279 0.280 0.0631 0.144 0.438
## 5 2020 0.257 0.249 0.0818 0.073 0.430
## 6 2021 0.253 0.252 0.0563 0.109 0.378
## 7 2022 0.226 0.229 0.0613 0.08 0.353
ggplot(summary_table, aes(x = Tahun)) +
geom_point(aes(y = Median), color = "darkorange", size = 5, shape = 16) +
geom_point(aes(y = Mean), color = "darkorange4", size = 5, shape = 18) +
geom_errorbar(aes(y = Mean, ymin = Mean - SD, ymax = Mean + SD), width = 0.2, color = "darkolivegreen", size = 1, linetype = "dotted") +
geom_text_repel(aes(y = Median, label = "Median"), color = "darkorange", size = 3) +
geom_text_repel(aes(y = Mean, label = "Mean"), color = "darkorange4", size = 3) +
labs(title = "Plot Variabel Y (Prevalensi Stunting)",
x = "Period",
y = "Stunting Prevalences") +
theme_light() +
theme(legend.position = "bottom") +
scale_color_manual(values = c("darkorange", "darkorange4"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Notably, both the Median and Mean values exhibit a downward trend over the years, suggesting an overall improvement in child nutrition. However, the higher standard deviation (SD) values, particularly in 2019 and 2022, indicate increased variability in stunting rates among provinces. The anomalies observed in 2018, where both Mean and Median values show a significant increase, warrant further investigation to identify the factors contributing to this spike in stunting rates. These insights underscore the importance of considering both central tendency and variability in assessing the effectiveness of interventions and policies aimed at addressing child malnutrition in Indonesia.
pairs.panels(data[, c("Y", "X1", "X2", "X3", "X4", "X5", "X6")],
method = "pearson", hist.col = "darkgoldenrod", col = "orange", density=TRUE)
vars_of_interest <- c("Y", "X1", "X2", "X3", "X4", "X5", "X6")
# Create histograms for each variable
histograms <- lapply(vars_of_interest, function(var) {
ggplot(data, aes_string(x = var)) +
geom_histogram(fill = "darkgoldenrod", color = "orange") +
labs(title = var) +
theme_minimal()
})
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Arrange histograms in a grid
gridExtra::grid.arrange(grobs = histograms, ncol = 3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ks_test <- ks.test(data$Y, "pnorm", mean(data$Y), sd(data$Y))
## Warning in ks.test.default(data$Y, "pnorm", mean(data$Y), sd(data$Y)): ties
## should not be present for the Kolmogorov-Smirnov test
ks_test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: data$Y
## D = 0.043513, p-value = 0.7583
## alternative hypothesis: two-sided
p-value > 0.05, artinya peubah dependen (tingkat prevalensi stunting) sudah mengikuti sebaran normal (tidak tolak HO)
# Create QQ plot
qqplot <- ggqqplot(data$Y, color = "goldenrod2") +
theme_light()
print(qqplot)
# Create histogram for variable Y
histogram_Y <- ggplot(data, aes(x = Y)) +
geom_histogram(fill = "darkgoldenrod", color = "darkgoldenrod") +
labs(title = "Histogram of Y") +
theme_light()
print(histogram_Y)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Check for normality visually by adding a density curve
density_Y <- ggplot(data, aes(x = Y)) +
geom_histogram(aes(y = ..density..), fill = "darkgoldenrod", color = "orange", alpha = 0.7) +
geom_density(color = "red", size = 1) +
labs(title = "Histogram of Y with Density Curve") +
theme_light()
print(density_Y)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
predictor_vars <- c("X1", "X2", "X3", "X4", "X5", "X6")
# Initialize empty vectors to store correlation coefficients and p-values
correlation_coefficients <- numeric(length(predictor_vars))
p_values <- numeric(length(predictor_vars))
# Loop through each predictor variable and calculate correlation coefficients and p-values
for (i in seq_along(predictor_vars)) {
cor_test <- cor.test(data$Y, data[[predictor_vars[i]]])
correlation_coefficients[i] <- cor_test$estimate
p_values[i] <- cor_test$p.value
}
results_df <- data.frame(
Prediktor = predictor_vars,
Korelasi = round(correlation_coefficients, 3),
Sig = round(p_values, 10))
print(results_df)
## Prediktor Korelasi Sig
## 1 X1 -0.183 0.0046588237
## 2 X2 -0.288 0.0000061671
## 3 X3 -0.529 0.0000000000
## 4 X4 0.335 0.0000001155
## 5 X5 0.407 0.0000000001
## 6 X6 0.432 0.0000000000
vif_model <- lm(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data)
vif_values <- car::vif(vif_model)
print(vif_values)
## X1 X2 X3 X4 X5 X6
## 1.021524 1.850711 1.823666 1.652282 1.390280 1.257463
VIF<10, Tidak ada indikasi multikolinearitas yang signifikan dalam model regresi Anda, karena semua nilai VIF berada dalam rentang yang dapat diterima
vif_table <- data.frame()
for (year in unique(data$Tahun)) {
data_year <- subset(data, year == Tahun)
vif_model1 <- lm(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data_year)
vif_values <- car::vif(vif_model1)
vif_table <- rbind(vif_table, c(year, vif_values))
}
colnames(vif_table) <- c("Year", names(vif_values))
print(vif_table)
## Year X1 X2 X3 X4
## 1 2016 1.12740980734304 2.09271300530027 1.47098103530659 1.89661048643167
## 2 2017 1.12564094370397 2.55818648406934 2.06821695605387 1.71996739441261
## 3 2018 1.14997631696569 1.50216903883032 2.28557100549039 1.48300102385845
## 4 2019 1.04935387079359 2.37197661519288 1.93375716645917 2.3319739551057
## 5 2020 1.31933638188567 2.21875357627972 2.33342317495787 1.87634970464966
## 6 2021 1.14915003389386 2.3712719894185 2.28540161012724 1.85119961249292
## 7 2022 1.24360278365602 2.56383475278028 2.06676317819729 2.42350679515741
## X5 X6
## 1 1.64686313476231 1.19141837574843
## 2 1.50513676526255 1.49731319334749
## 3 1.66859953167215 1.45997191812989
## 4 1.5136723906638 1.35697574689578
## 5 2.08744657595567 1.45640691952426
## 6 1.5758374624917 1.44056501946028
## 7 1.38937537259112 1.49598382774393
#spatial data preparation
provinsi_sf <- st_read("/Users/Dicky-Jr.DataAnalyst/Desktop/2023 Kolokium/BATAS_PROVINSI_DESEMBER_2019_DUKCAPIL_bersihh.shp")
## Reading layer `BATAS_PROVINSI_DESEMBER_2019_DUKCAPIL_bersihh' from data source
## `/Users/Dicky-Jr.DataAnalyst/Desktop/2023 Kolokium/BATAS_PROVINSI_DESEMBER_2019_DUKCAPIL_bersihh.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 95.01079 ymin: -11.00762 xmax: 141.0194 ymax: 6.07693
## Geodetic CRS: WGS 84
provinsi_sf$PROVINSI
## [1] "ACEH" "BALI"
## [3] "BANTEN" "BENGKULU"
## [5] "DAERAH ISTIMEWA YOGYAKARTA" "DKI JAKARTA"
## [7] "GORONTALO" "JAMBI"
## [9] "JAWA BARAT" "JAWA TENGAH"
## [11] "JAWA TIMUR" "KALIMANTAN BARAT"
## [13] "KALIMANTAN SELATAN" "KALIMANTAN TENGAH"
## [15] "KALIMANTAN TIMUR" "KALIMANTAN UTARA"
## [17] "KEPULAUAN BANGKA BELITUNG" "KEPULAUAN RIAU"
## [19] "LAMPUNG" "MALUKU"
## [21] "MALUKU UTARA" "NUSA TENGGARA BARAT"
## [23] "NUSA TENGGARA TIMUR" "PAPUA"
## [25] "PAPUA BARAT" "RIAU"
## [27] "SULAWESI BARAT" "SULAWESI SELATAN"
## [29] "SULAWESI TENGAH" "SULAWESI TENGGARA"
## [31] "SULAWESI UTARA" "SUMATERA BARAT"
## [33] "SUMATERA SELATAN" "SUMATERA UTARA"
province_mapping <- c("ACEH" = "Aceh",
"BALI" = "Bali",
"BANTEN" = "Banten",
"BENGKULU" = "Bengkulu",
"DAERAH ISTIMEWA YOGYAKARTA" = "Special Region of Yogyakarta",
"DKI JAKARTA" = "Jakarta",
"GORONTALO" = "Gorontalo",
"JAMBI" = "Jambi",
"JAWA BARAT" = "West Java",
"JAWA TENGAH" = "Central Java",
"JAWA TIMUR" = "East Java",
"KALIMANTAN BARAT" = "West Kalimantan",
"KALIMANTAN SELATAN" = "South Kalimantan",
"KALIMANTAN TENGAH" = "Central Kalimantan",
"KALIMANTAN TIMUR" = "East Kalimantan",
"KALIMANTAN UTARA" = "North Kalimantan",
"KEPULAUAN BANGKA BELITUNG" = "Bangka Belitung Islands",
"KEPULAUAN RIAU" = "Riau Islands",
"LAMPUNG" = "Lampung",
"MALUKU" = "Maluku",
"MALUKU UTARA" = "North Maluku",
"NUSA TENGGARA BARAT" = "West Nusa Tenggara",
"NUSA TENGGARA TIMUR" = "East Nusa Tenggara",
"PAPUA" = "Papua",
"PAPUA BARAT" = "West Papua",
"RIAU" = "Riau",
"SULAWESI BARAT" = "West Sulawesi",
"SULAWESI SELATAN" = "South Sulawesi",
"SULAWESI TENGAH" = "Central Sulawesi",
"SULAWESI TENGGARA" = "Southeast Sulawesi",
"SULAWESI UTARA" = "North Sulawesi",
"SUMATERA BARAT" = "West Sumatra",
"SUMATERA SELATAN" = "South Sumatra",
"SUMATERA UTARA" = "North Sumatra")
# Replace native province names with English names
provinsi_sf <- provinsi_sf %>%
mutate(PROVINSI = case_when(
PROVINSI %in% names(province_mapping) ~ province_mapping[PROVINSI],
TRUE ~ as.character(PROVINSI)
))
data <- data %>%
mutate(
Provinsi = case_when(
Provinsi == "ACEH" ~ "Aceh",
Provinsi == "BALI" ~ "Bali",
Provinsi == "BANTEN" ~ "Banten",
Provinsi == "BENGKULU" ~ "Bengkulu",
Provinsi == "DI YOGYAKARTA" ~ "Special Region of Yogyakarta",
Provinsi == "DKI JAKARTA" ~ "Jakarta",
Provinsi == "GORONTALO" ~ "Gorontalo",
Provinsi == "JAMBI" ~ "Jambi",
Provinsi == "JAWA BARAT" ~ "West Java",
Provinsi == "JAWA TENGAH" ~ "Central Java",
Provinsi == "JAWA TIMUR" ~ "East Java",
Provinsi == "KALIMANTAN BARAT" ~ "West Kalimantan",
Provinsi == "KALIMANTAN SELATAN" ~ "South Kalimantan",
Provinsi == "KALIMANTAN TENGAH" ~ "Central Kalimantan",
Provinsi == "KALIMANTAN TIMUR" ~ "East Kalimantan",
Provinsi == "KALIMANTAN UTARA" ~ "North Kalimantan",
Provinsi == "KEP. BANGKA BELITUNG" ~ "Bangka Belitung Islands",
Provinsi == "KEP. RIAU" ~ "Riau Islands",
Provinsi == "LAMPUNG" ~ "Lampung",
Provinsi == "MALUKU" ~ "Maluku",
Provinsi == "MALUKU UTARA" ~ "North Maluku",
Provinsi == "NUSA TENGGARA BARAT" ~ "West Nusa Tenggara",
Provinsi == "NUSA TENGGARA TIMUR" ~ "East Nusa Tenggara",
Provinsi == "PAPUA" ~ "Papua",
Provinsi == "PAPUA BARAT" ~ "West Papua",
Provinsi == "RIAU" ~ "Riau",
Provinsi == "SULAWESI BARAT" ~ "West Sulawesi",
Provinsi == "SULAWESI SELATAN" ~ "South Sulawesi",
Provinsi == "SULAWESI TENGAH" ~ "Central Sulawesi",
Provinsi == "SULAWESI TENGGARA" ~ "Southeast Sulawesi",
Provinsi == "SULAWESI UTARA" ~ "North Sulawesi",
Provinsi == "SUMATERA BARAT" ~ "West Sumatra",
Provinsi == "SUMATERA SELATAN" ~ "South Sumatra",
Provinsi == "SUMATERA UTARA" ~ "North Sumatra",
TRUE ~ Provinsi
)
)
data$Tahun <- as.character(data$Tahun)
# Function to merge data for a given year
merge_spatial_data <- function(year, data, provinsi_sf) {
data_year <- data %>% filter(Tahun == year)
merged_data <- merge(provinsi_sf, data_year, by.x = "PROVINSI", by.y = "Provinsi")
return(merged_data)
}
# Merge data for each year
spatial_data_2016 <- merge_spatial_data("2016", data, provinsi_sf)
spatial_data_2017 <- merge_spatial_data("2017", data, provinsi_sf)
spatial_data_2018 <- merge_spatial_data("2018", data, provinsi_sf)
spatial_data_2019 <- merge_spatial_data("2019", data, provinsi_sf)
spatial_data_2020 <- merge_spatial_data("2020", data, provinsi_sf)
spatial_data_2021 <- merge_spatial_data("2021", data, provinsi_sf)
spatial_data_2022 <- merge_spatial_data("2022", data, provinsi_sf)
spatial_data_2016$PROVINSI <- as.character(spatial_data_2016$PROVINSI)
spatial_data_2017$PROVINSI <- as.character(spatial_data_2017$PROVINSI)
spatial_data_2018$PROVINSI <- as.character(spatial_data_2018$PROVINSI)
spatial_data_2019$PROVINSI <- as.character(spatial_data_2019$PROVINSI)
spatial_data_2020$PROVINSI <- as.character(spatial_data_2020$PROVINSI)
spatial_data_2021$PROVINSI <- as.character(spatial_data_2021$PROVINSI)
spatial_data_2022$PROVINSI <- as.character(spatial_data_2022$PROVINSI)
spas.2016 <- data.frame(Provinsi = spatial_data_2016$PROVINSI, Y = spatial_data_2016$Y)
spas.2017 <- data.frame(Provinsi = spatial_data_2017$PROVINSI, Y = spatial_data_2017$Y)
spas.2018 <- data.frame(Provinsi = spatial_data_2018$PROVINSI, Y = spatial_data_2018$Y)
spas.2019 <- data.frame(Provinsi = spatial_data_2019$PROVINSI, Y = spatial_data_2019$Y)
spas.2020 <- data.frame(Provinsi = spatial_data_2020$PROVINSI, Y = spatial_data_2020$Y)
spas.2021 <- data.frame(Provinsi = spatial_data_2021$PROVINSI, Y = spatial_data_2021$Y)
spas.2022 <- data.frame(Provinsi = spatial_data_2022$PROVINSI, Y = spatial_data_2022$Y)
centroids <- st_centroid(provinsi_sf)
## Warning: st_centroid assumes attributes are constant over geometries
coords <- st_coordinates(centroids) #extract coordinates from centroids
row.names(coords) <- provinsi_sf$PROVINSI
coords_longlat <- coords[, ]
# Ubah koordinat dalam format longlat
coords_longlat[, 1] <- coords[, 1] # Tidak perlu perubahan pada koordinat X (longitude)
coords_longlat[, 2] <- coords[, 2] # Tidak perlu perubahan pada koordinat Y (latitude)
k3 <- knn2nb(knearneigh(coords_longlat, k = 3))
W <- nb2mat(k3)
dlist2 <- mat2listw(W, style = "W")
moran_results <- list()
# Loop untuk menjalankan uji Moran I untuk setiap tahun
for (year in 2016:2022) {
spatial_data <- get(paste0("spas.", year))
moran <- moran.test(spatial_data$Y, dlist2, randomisation = FALSE)
moran_results[[as.character(year)]] <- c(Moran_I = moran$estimate, p_value = moran$p.value)
}
tahun <- c()
moran_i <- c()
p_value <- c()
# Loop melalui hasil uji Moran's I untuk setiap tahun
for (year in 2016:2022) {
tahun <- c(tahun, year)
moran_i <- c(moran_i, moran_results[[as.character(year)]]["Moran_I.Moran I statistic"])
p_value <- c(p_value, moran_results[[as.character(year)]]["p_value"])
}
moran_df <- data.frame(Tahun = tahun, Moran_I_Statistic = moran_i, p_value = p_value)
print(moran_df)
## Tahun Moran_I_Statistic p_value
## 1 2016 -0.081790862 0.6640137
## 2 2017 -0.059800066 0.5958374
## 3 2018 -0.071971864 0.6340840
## 4 2019 -0.071845554 0.6336932
## 5 2020 0.054840081 0.2418926
## 6 2021 -0.009052419 0.4306311
## 7 2022 0.078341494 0.1857931
k <- 3
knear <- knearneigh(coords, k)
knear_nb <- knn2nb(knear, row.names = row.names(coords))
segments <- NULL
for (i in 1:length(provinsi_sf$PROVINSI)) {
for (j in knear_nb[[i]]) {
segment <- data.frame(
x = coords[i, 1], y = coords[i, 2],
xend = coords[j, 1], yend = coords[j, 2]
)
segments <- rbind(segments, segment)
}
}
#perbaiki format penulisan nama setiap provinsi
provinsi_sf <- st_sf(cbind(provinsi_sf, st_coordinates(centroids)))
capitalize_first <- function(text) {
return(str_to_title(tolower(text)))
}
provinsi_sf$PROVINSI <- sapply(provinsi_sf$PROVINSI, capitalize_first)
ggplot(data = provinsi_sf) +
geom_sf(fill = "white", color = "black") +
geom_sf(data = centroids, color = "red", size = 3) +
geom_segment(data = segments, aes(x = x, y = y, xend = xend, yend = yend), color = "goldenrod") +
geom_text(aes(x = X, y = Y, label = PROVINSI), size = 2, color = "black", check_overlap = TRUE) +
theme_classic() +
labs(title = "K-nearest Neighbors (K=3) untuk Provinsi di Indonesia")
w1 <- as.matrix(1/dist(coords_longlat))
dlist1 = mat2listw(w1, style = "W")
moran_results2 <- list()
# Loop untuk menjalankan uji Moran I untuk setiap tahun
for (year in 2016:2022) {
spatial_data <- get(paste0("spas.", year))
moran <- moran.test(spatial_data$Y, dlist1, randomisation = FALSE)
moran_results2[[as.character(year)]] <- c(Moran_I = moran$estimate, p_value = moran$p.value)
}
tahun <- c()
moran_i <- c()
p_value <- c()
for (year in 2016:2022) {
tahun <- c(tahun, year)
moran_i <- c(moran_i, moran_results2[[as.character(year)]]["Moran_I.Moran I statistic"])
p_value <- c(p_value, moran_results2[[as.character(year)]]["p_value"])
}
moran_df2 <- data.frame(Tahun = tahun, Moran_I_Statistic = moran_i, p_value = p_value)
print(moran_df2)
## Tahun Moran_I_Statistic p_value
## 1 2016 -0.0506850400 0.7136596
## 2 2017 -0.0441156714 0.6488765
## 3 2018 -0.0425933429 0.6331304
## 4 2019 -0.0452818417 0.6607697
## 5 2020 -0.0081594223 0.2699835
## 6 2021 -0.0208535567 0.3968416
## 7 2022 -0.0009030577 0.2079097
alpha=2
w3<-exp((-alpha)*as.matrix(dist(coords_longlat))) #dinormalisasi
diag(w3)<-0
rtot<-rowSums(w3,na.rm=TRUE)
w3.sd<-w3/rtot #row-normalized
dlist3 <- mat2listw(w3.sd, style = "W")
moran_results3 <- list()
# Loop untuk menjalankan uji Moran I untuk setiap tahun
for (year in 2016:2022) {
# Ambil data spasial untuk tahun tertentu
spatial_data <- get(paste0("spas.", year))
# Jalankan uji Moran I
moran <- moran.test(spatial_data$Y, dlist3, randomisation = FALSE)
# Simpan hasil uji Moran I dalam list
moran_results3[[as.character(year)]] <- c(Moran_I = moran$estimate, p_value = moran$p.value)
}
tahun <- c()
moran_i <- c()
p_value <- c()
# Loop melalui hasil uji Moran's I untuk setiap tahun
for (year in 2016:2022) {
tahun <- c(tahun, year)
moran_i <- c(moran_i, moran_results3[[as.character(year)]]["Moran_I.Moran I statistic"])
p_value <- c(p_value, moran_results3[[as.character(year)]]["p_value"])
}
moran_df3 <- data.frame(Tahun = tahun, Moran_I_Statistic = moran_i, p_value = p_value)
print(moran_df3)
## Tahun Moran_I_Statistic p_value
## 1 2016 -0.13056239 0.71653431
## 2 2017 -0.10481434 0.66477508
## 3 2018 -0.09957581 0.65380498
## 4 2019 -0.08070477 0.61326617
## 5 2020 0.26293538 0.04699938
## 6 2021 -0.07837289 0.60815932
## 7 2022 0.07851191 0.26715602
cem_model <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "pooling")
fem_model <- plm(Y ~ X1 + X2 + X3 + X4 + X5 + X6, index = c("Provinsi", "Tahun"), data = data, model = "within", effect = "time")
rem_model <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "random")
summary(fem_model)
## Oneway (time) effect Within Model
##
## Call:
## plm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, effect = "time",
## model = "within", index = c("Provinsi", "Tahun"))
##
## Balanced Panel: n = 34, T = 7, N = 238
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.166824 -0.031332 0.001200 0.029734 0.114450
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0496021 0.0326773 1.5179 0.130434
## X2 -0.0383813 0.0375170 -1.0230 0.307389
## X3 -0.0997874 0.0365787 -2.7280 0.006875 **
## X4 0.0058398 0.0026323 2.2185 0.027521 *
## X5 0.1601647 0.0386196 4.1472 4.770e-05 ***
## X6 0.0146979 0.0020823 7.0584 2.067e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.87099
## Residual Sum of Squares: 0.44657
## R-Squared: 0.48728
## Adj. R-Squared: 0.45994
## F-statistic: 35.6395 on 6 and 225 DF, p-value: < 2.22e-16
coefficients <- coef(fem_model)
coefficients
## X1 X2 X3 X4 X5 X6
## 0.049602110 -0.038381277 -0.099787433 0.005839778 0.160164675 0.014697901
fem_model_time <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "within", effect="time")
rem_model_time <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "random", effect="time", index = c("Provinsi","Tahun"), random.method = "walhus")
fem_model_twoways <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "within", effect="twoways")
rem_model_twoways <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "random", effect="twoways", index = c("Provinsi","Tahun"), random.method = "walhus")
fem_model_individual <- plm(Y ~ X1 + X3 + X4 + X5 + X6, data = data, model = "within", effect="individual")
summary_fem_twoways <- summary(fem_model_twoways)
summary_fem_time <- summary(fem_model_time)
summary_fem_individual <- summary(fem_model_individual)
summary_fem_twoways
## Twoways effects Within Model
##
## Call:
## plm(formula = Y ~ X1 + X3 + X4 + X5 + X6, data = data, effect = "twoways",
## model = "within")
##
## Balanced Panel: n = 34, T = 7, N = 238
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1459867 -0.0141809 0.0014491 0.0186557 0.0711934
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 -0.03837771 0.04344448 -0.8834 0.3781
## X3 0.05626712 0.06600355 0.8525 0.3950
## X4 -0.01925531 0.00835157 -2.3056 0.0222 *
## X5 0.03367907 0.08078521 0.4169 0.6772
## X6 -0.00038569 0.00286680 -0.1345 0.8931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.22208
## Residual Sum of Squares: 0.21466
## R-Squared: 0.033435
## Adj. R-Squared: -0.18692
## F-statistic: 1.33524 on 5 and 193 DF, p-value: 0.25096
summary_fem_time
## Oneway (time) effect Within Model
##
## Call:
## plm(formula = Y ~ X1 + X3 + X4 + X5 + X6, data = data, effect = "time",
## model = "within")
##
## Balanced Panel: n = 34, T = 7, N = 238
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.16427190 -0.03025906 0.00060866 0.02986823 0.11990787
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0530145 0.0325100 1.6307 0.104343
## X3 -0.1099174 0.0352167 -3.1212 0.002036 **
## X4 0.0072360 0.0022511 3.2144 0.001498 **
## X5 0.1650161 0.0383313 4.3050 2.489e-05 ***
## X6 0.0140202 0.0019743 7.1012 1.590e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.87099
## Residual Sum of Squares: 0.44865
## R-Squared: 0.4849
## Adj. R-Squared: 0.45982
## F-statistic: 42.5493 on 5 and 226 DF, p-value: < 2.22e-16
summary_fem_individual
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = Y ~ X1 + X3 + X4 + X5 + X6, data = data, effect = "individual",
## model = "within")
##
## Balanced Panel: n = 34, T = 7, N = 238
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1555365 -0.0179042 0.0013552 0.0228221 0.0718460
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 -0.19521577 0.02863800 -6.8167 1.087e-10 ***
## X3 -0.04925891 0.06576370 -0.7490 0.4547
## X4 -0.00927817 0.00839342 -1.1054 0.2703
## X5 -0.07396479 0.07574118 -0.9765 0.3300
## X6 -0.00042312 0.00272843 -0.1551 0.8769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.38696
## Residual Sum of Squares: 0.27962
## R-Squared: 0.27739
## Adj. R-Squared: 0.1394
## F-statistic: 15.278 on 5 and 199 DF, p-value: 1.056e-12
pooltest(cem_model,fem_model_twoways)
##
## F statistic
##
## data: Y ~ X1 + X3 + X4 + X5 + X6
## F = 8.5169, df1 = 39, df2 = 193, p-value < 2.2e-16
## alternative hypothesis: unstability
phtest(rem_model,fem_model)
##
## Hausman Test
##
## data: Y ~ X1 + X3 + X4 + X5 + X6
## chisq = 31.061, df = 5, p-value = 9.11e-06
## alternative hypothesis: one model is inconsistent
Output di atas menguji perbandingan antara model rem individu, waktu, dan dua arah dengan model fem individu, fem waktu, dan fem efek dua arah. Dari ketiga output semuanya memberikan hasil yaitu p-value < α(5%) maka tolak H0. Artinya penggunaan model FEM lebih baik diterapkan terhadap data daripada model REM.
fem.twoway <- plm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6,
data = data, model = "within", effect= "twoways", index = c("Provinsi","Tahun"))
plmtest(fem.twoway,type = "bp", effect="twoways" )
##
## Lagrange Multiplier Test - two-ways effects (Breusch-Pagan)
##
## data: Y ~ X1 + X2 + X3 + X4 + X5 + X6
## chisq = 121.09, df = 2, p-value < 2.2e-16
## alternative hypothesis: significant effects
plmtest(fem.twoway,type = "bp", effect="individual" )
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: Y ~ X1 + X2 + X3 + X4 + X5 + X6
## chisq = 57.441, df = 1, p-value = 3.483e-14
## alternative hypothesis: significant effects
plmtest(fem.twoway,type = "bp", effect="time" )
##
## Lagrange Multiplier Test - time effects (Breusch-Pagan)
##
## data: Y ~ X1 + X2 + X3 + X4 + X5 + X6
## chisq = 63.65, df = 1, p-value = 1.486e-15
## alternative hypothesis: significant effects
pcdtest(fem.twoway, test = c("lm"), index=NULL,w =NULL )
##
## Breusch-Pagan LM test for cross-sectional dependence in panels
##
## data: Y ~ X1 + X2 + X3 + X4 + X5 + X6
## chisq = 791.5, df = 561, p-value = 4.265e-10
## alternative hypothesis: cross-sectional dependence
pcdtest(fem_model_time, test = c("lm")) #Breusch-Pagan LM test for cross-sectional dependence
##
## Breusch-Pagan LM test for cross-sectional dependence in panels
##
## data: Y ~ X1 + X3 + X4 + X5 + X6
## chisq = 830.2, df = 561, p-value = 9.474e-13
## alternative hypothesis: cross-sectional dependence
pcdtest(fem_model_time, test = c("cd")) #Pesaran CD test for cross-sectional dependence
##
## Pesaran CD test for cross-sectional dependence in panels
##
## data: Y ~ X1 + X3 + X4 + X5 + X6
## z = -0.75516, p-value = 0.4502
## alternative hypothesis: cross-sectional dependence
pbgtest(fem_model_time) #Breusch-Godfrey/Wooldridge test for serial correlation
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: Y ~ X1 + X3 + X4 + X5 + X6
## chisq = 38.741, df = 7, p-value = 2.19e-06
## alternative hypothesis: serial correlation in idiosyncratic errors
#function for lM and Robust-LM Lag and Error Tests
tests <- c("lml", "rlml", "lme", "rlme")
dlists <- list(dlist1 = dlist1, dlist2 = dlist2, dlist3 = dlist3)
output_list <- list()
for (test in tests) {
for (dlist_name in names(dlists)) {
dlist <- dlists[[dlist_name]]
result <- slmtest(Y ~ X1 + X2 + X3 + X4 + X5 + X6,
data=data,
listw=dlist,
test=test,
model="within",
effects="time")
output_name <- paste("Test:", test, ", Listw:", dlist_name)
output_list[[output_name]] <- result
}
}
for (output_name in names(output_list)) {
cat("Output:", output_name, "\n")
print(output_list[[output_name]])
}
## Output: Test: lml , Listw: dlist1
##
## LM test for spatial lag dependence
##
## data: formula (within transformation)
## LM = 72.373, df = 1, p-value < 2.2e-16
## alternative hypothesis: spatial lag dependence
##
## Output: Test: lml , Listw: dlist2
##
## LM test for spatial lag dependence
##
## data: formula (within transformation)
## LM = 26.577, df = 1, p-value = 2.533e-07
## alternative hypothesis: spatial lag dependence
##
## Output: Test: lml , Listw: dlist3
##
## LM test for spatial lag dependence
##
## data: formula (within transformation)
## LM = 25.615, df = 1, p-value = 4.167e-07
## alternative hypothesis: spatial lag dependence
##
## Output: Test: rlml , Listw: dlist1
##
## Locally robust LM test for spatial lag dependence sub spatial error
##
## data: formula (within transformation)
## LM = 21.002, df = 1, p-value = 4.588e-06
## alternative hypothesis: spatial lag dependence
##
## Output: Test: rlml , Listw: dlist2
##
## Locally robust LM test for spatial lag dependence sub spatial error
##
## data: formula (within transformation)
## LM = 20.327, df = 1, p-value = 6.527e-06
## alternative hypothesis: spatial lag dependence
##
## Output: Test: rlml , Listw: dlist3
##
## Locally robust LM test for spatial lag dependence sub spatial error
##
## data: formula (within transformation)
## LM = 20.354, df = 1, p-value = 6.435e-06
## alternative hypothesis: spatial lag dependence
##
## Output: Test: lme , Listw: dlist1
##
## LM test for spatial error dependence
##
## data: formula (within transformation)
## LM = 52.209, df = 1, p-value = 4.991e-13
## alternative hypothesis: spatial error dependence
##
## Output: Test: lme , Listw: dlist2
##
## LM test for spatial error dependence
##
## data: formula (within transformation)
## LM = 13.619, df = 1, p-value = 0.0002239
## alternative hypothesis: spatial error dependence
##
## Output: Test: lme , Listw: dlist3
##
## LM test for spatial error dependence
##
## data: formula (within transformation)
## LM = 14.685, df = 1, p-value = 0.0001271
## alternative hypothesis: spatial error dependence
##
## Output: Test: rlme , Listw: dlist1
##
## Locally robust LM test for spatial error dependence sub spatial lag
##
## data: formula (within transformation)
## LM = 0.83783, df = 1, p-value = 0.36
## alternative hypothesis: spatial error dependence
##
## Output: Test: rlme , Listw: dlist2
##
## Locally robust LM test for spatial error dependence sub spatial lag
##
## data: formula (within transformation)
## LM = 7.3691, df = 1, p-value = 0.006635
## alternative hypothesis: spatial error dependence
##
## Output: Test: rlme , Listw: dlist3
##
## Locally robust LM test for spatial error dependence sub spatial lag
##
## data: formula (within transformation)
## LM = 9.4236, df = 1, p-value = 0.002142
## alternative hypothesis: spatial error dependence
lmoran.2016 <- localmoran(spas.2016$Y, dlist1) %>% as.data.frame()
lmoran.2017 <- localmoran(spas.2017$Y, dlist1) %>% as.data.frame()
lmoran.2018 <- localmoran(spas.2018$Y, dlist1) %>% as.data.frame()
lmoran.2019 <- localmoran(spas.2019$Y, dlist1) %>% as.data.frame()
lmoran.2020 <- localmoran(spas.2020$Y, dlist1) %>% as.data.frame()
lmoran.2021 <- localmoran(spas.2021$Y, dlist1) %>% as.data.frame()
lmoran.2022 <- localmoran(spas.2022$Y, dlist1) %>% as.data.frame()
provinsi_sf$scale.data.2018 <- scale(spas.2018$Y) %>% as.vector()
provinsi_sf$lag.data.2018 <- lag.listw(dlist2, provinsi_sf$scale.data.2018)
provinsi_sf$quad_sig.2018 <- NA
provinsi_sf[(provinsi_sf$scale.data.2018 >= 0 &
provinsi_sf$lag.data.2018 >= 0 ), "quad_sig.2018"] <- "High-High"
provinsi_sf[(provinsi_sf$scale.data.2018 <= 0 &
provinsi_sf$lag.data.2018 <= 0 ), "quad_sig.2018"] <- "Low-Low"
provinsi_sf[(provinsi_sf$scale.data.2018 >= 0 &
provinsi_sf$lag.data.2018 <= 0 ), "quad_sig.2018"] <- "High-Low"
provinsi_sf[(provinsi_sf$scale.data.2018 <= 0 &
provinsi_sf$lag.data.2018 >= 0 ), "quad_sig.2018"] <- "Low-High"
lmoran.2018$quad_sig.2018 = provinsi_sf$quad_sig.2018
x1 <- provinsi_sf$scale.data.2018
moran.plot(x1, dlist2, labels= provinsi_sf$PROVINSI)
#nilai signifikansi
provinsi_sf$sig.2018 <- NA
#high-high
provinsi_sf[(lmoran.2018$`Pr(z != E(Ii))` <= 0.05), "sig.2018"] <- "Signifikan"
#low-low
provinsi_sf[(lmoran.2018$`Pr(z != E(Ii))` > 0.05), "sig.2018"] <- "Tidak Signifikan"
lmoran.2018$sig.2018 = provinsi_sf$sig.2018
write.table(lmoran.2018, "local moran 2018.csv")
plot.indo.2018 <- ggplot(data = provinsi_sf) +
geom_sf(mapping = aes(fill = quad_sig.2018)) +
geom_sf_text(aes(label = PROVINSI), size = 2) +
scale_fill_manual(values = c("blue", "cyan", "yellow", "red")) + # Adjust color values according to your categories
labs(title = "Plot 2018 dengan Pembobot 3-NN", fill = "category") +
theme_minimal()
plot.indo.2018
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
Suatu daerah memiliki hubungan spasial high–high (H-H) dengan daerah sekitarnya jika memiliki nilai LISA yang sama tinggi dan signifikan. Hal ini berarti daerah yang mempunyai jumlah penderita tinggi berdekatan dengan daerah yang memiliki jumlah penderita yang tinggi juga
plot.indo.sig.2018 = ggplot(provinsi_sf)+
geom_sf(mapping=aes(fill=sig.2018))+
geom_sf_text(aes(label=PROVINSI), size=2)+
scale_fill_manual(values= c("red", "gold"))+
labs(title = "Plot Signifikansi 2018 dengan Pembobot 3-NN",
fill="signifikansi")+
theme_minimal()
plot.indo.sig.2018
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
provinsi_sf$scale.data.2020 <- scale(spas.2020$Y) %>% as.vector()
provinsi_sf$lag.data.2020 <- lag.listw(dlist2, provinsi_sf$scale.data.2020)
provinsi_sf$quad_sig.2020 <- NA
provinsi_sf[(provinsi_sf$scale.data.2020 >= 0 &
provinsi_sf$lag.data.2020 >= 0 ), "quad_sig.2020"] <- "High-High"
provinsi_sf[(provinsi_sf$scale.data.2020 <= 0 &
provinsi_sf$lag.data.2020 <= 0 ), "quad_sig.2020"] <- "Low-Low"
provinsi_sf[(provinsi_sf$scale.data.2020 >= 0 &
provinsi_sf$lag.data.2020 <= 0 ), "quad_sig.2020"] <- "High-Low"
provinsi_sf[(provinsi_sf$scale.data.2020 <= 0 &
provinsi_sf$lag.data.2020 >= 0 ), "quad_sig.2020"] <- "Low-High"
lmoran.2020$quad_sig.2020 = provinsi_sf$quad_sig.2020
x1 <- provinsi_sf$scale.data.2020
moran.plot(x1, dlist2, labels= provinsi_sf$PROVINSI)
#nilai signifikansi
provinsi_sf$sig.2020 <- NA
#high-high
provinsi_sf[(lmoran.2020$`Pr(z != E(Ii))` <= 0.05), "sig.2020"] <- "Significant"
#low-low
provinsi_sf[(lmoran.2020$`Pr(z != E(Ii))` > 0.05), "sig.2020"] <- "Not Significant"
lmoran.2020$sig.2020 = provinsi_sf$sig.2020
write.table(lmoran.2020, "local moran 2020.csv")
plot.indo.2020 <- ggplot(data = provinsi_sf) +
geom_sf(mapping = aes(fill = quad_sig.2020)) +
geom_sf_text(aes(label = PROVINSI), size = 2) +
scale_fill_manual(values = c("blue", "cyan", "yellow", "red")) +
labs(title = "Plot 2020 with 3-NN Matrix Weight", fill = "Category") +
theme_minimal()
plot.indo.2020
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
plot.indo.sig.2020 = ggplot(provinsi_sf)+
geom_sf(mapping=aes(fill=sig.2020))+
geom_sf_text(aes(label=PROVINSI), size=2)+
scale_fill_manual(values= c("darkgrey", "red3"))+
labs(title = "Significant Plot for Stunting Prevalences in 2020",
fill="Significant or Not")+
theme_classic()
plot.indo.sig.2020
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
provinsi_sf$scale.data.2022 <- scale(spas.2022$Y) %>% as.vector()
provinsi_sf$lag.data.2022 <- lag.listw(dlist2, provinsi_sf$scale.data.2022)
provinsi_sf$quad_sig.2022 <- NA
provinsi_sf[(provinsi_sf$scale.data.2022 >= 0 &
provinsi_sf$lag.data.2022 >= 0 ), "quad_sig.2022"] <- "High-High"
provinsi_sf[(provinsi_sf$scale.data.2022 <= 0 &
provinsi_sf$lag.data.2022 <= 0 ), "quad_sig.2022"] <- "Low-Low"
provinsi_sf[(provinsi_sf$scale.data.2022 >= 0 &
provinsi_sf$lag.data.2022 <= 0 ), "quad_sig.2022"] <- "High-Low"
provinsi_sf[(provinsi_sf$scale.data.2022 <= 0 &
provinsi_sf$lag.data.2022 >= 0 ), "quad_sig.2022"] <- "Low-High"
lmoran.2022$quad_sig.2022 = provinsi_sf$quad_sig.2022
x1 <- provinsi_sf$scale.data.2022
moran.plot(x1, dlist2, labels= provinsi_sf$PROVINSI)
#nilai signifikansi
provinsi_sf$sig.2022 <- NA
#high-high
provinsi_sf[(lmoran.2022$`Pr(z != E(Ii))` <= 0.05), "sig.2022"] <- "Significant"
#low-low
provinsi_sf[(lmoran.2022$`Pr(z != E(Ii))` > 0.05), "sig.2022"] <- "Not Significant"
lmoran.2022$sig.2022 = provinsi_sf$sig.2022
write.table(lmoran.2022, "local moran 2022.csv")
plot.indo.2022 <- ggplot(data = provinsi_sf) +
geom_sf(mapping = aes(fill = quad_sig.2022)) +
geom_sf_text(aes(label = PROVINSI), size = 2) +
scale_fill_manual(values = c("blue", "cyan", "yellow", "red")) +
labs(title = "Plot 2022 with 3-NN Matrix Weight", fill = "Category") +
theme_minimal()
plot.indo.2022
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
plot.indo.sig.2022 = ggplot(provinsi_sf)+
geom_sf(mapping=aes(fill=sig.2022))+
geom_sf_text(aes(label=PROVINSI), size=2)+
scale_fill_manual(values= c("gold", "red"))+
labs(title = "Significant Plot for Stunting Prevalences in 2022",
fill="Significant or Not")+
theme_minimal()
plot.indo.sig.2022
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
sar_fe1 <- spml(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, listw = dlist1, lag=TRUE, model="within", effect="time", spatial.error = "none")
sar_fe2 <- spml(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, listw = dlist2, lag=TRUE, model="within", effect="time", spatial.error = "none")
sar_fe3 <- spml(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, listw = dlist3, lag=TRUE, model="within", effect="time", spatial.error = "none")
summary(sar_fe1)
## Spatial panel fixed effects lag model
##
##
## Call:
## spml(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## listw = dlist1, model = "within", effect = "time", lag = TRUE,
## spatial.error = "none")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.16856904 -0.02968755 0.00069641 0.02919307 0.11186119
##
## Spatial autoregressive coefficient:
## Estimate Std. Error t-value Pr(>|t|)
## lambda -0.19648 0.20667 -0.9507 0.3418
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0506562 0.0317187 1.5970 0.110256
## X2 -0.0362648 0.0364652 -0.9945 0.319977
## X3 -0.0977561 0.0354930 -2.7542 0.005883 **
## X4 0.0059434 0.0025540 2.3271 0.019958 *
## X5 0.1629540 0.0374576 4.3504 1.359e-05 ***
## X6 0.0144037 0.0020368 7.0716 1.531e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(sar_fe2) #best model
## Spatial panel fixed effects lag model
##
##
## Call:
## spml(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## listw = dlist2, model = "within", effect = "time", lag = TRUE,
## spatial.error = "none")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.16481046 -0.03046805 0.00051759 0.02909263 0.11631860
##
## Spatial autoregressive coefficient:
## Estimate Std. Error t-value Pr(>|t|)
## lambda 0.075472 0.071795 1.0512 0.2932
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0465681 0.0318220 1.4634 0.143360
## X2 -0.0398155 0.0364479 -1.0924 0.274660
## X3 -0.1038726 0.0356644 -2.9125 0.003585 **
## X4 0.0057706 0.0025582 2.2558 0.024086 *
## X5 0.1552170 0.0375613 4.1324 3.591e-05 ***
## X6 0.0149315 0.0020341 7.3406 2.126e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(sar_fe3)
## Spatial panel fixed effects lag model
##
##
## Call:
## spml(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## listw = dlist3, model = "within", effect = "time", lag = TRUE,
## spatial.error = "none")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1615060 -0.0302665 0.0011039 0.0307863 0.1155925
##
## Spatial autoregressive coefficient:
## Estimate Std. Error t-value Pr(>|t|)
## lambda 0.050432 0.051213 0.9847 0.3248
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0452946 0.0317931 1.4247 0.154253
## X2 -0.0407651 0.0364835 -1.1174 0.263842
## X3 -0.0981339 0.0355479 -2.7606 0.005769 **
## X4 0.0056837 0.0025716 2.2101 0.027095 *
## X5 0.1597173 0.0375907 4.2489 2.149e-05 ***
## X6 0.0149717 0.0020353 7.3559 1.896e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(sar_fe2)$effects
## [1] "tpfe"
#SAR-FE Model with Time Effect - spatial time (2016 until 2022)
sar_fe2$res.eff[[1]]$res.tfe
## [,1]
## 1 0.010043916
## 2 0.027427766
## 3 0.041584142
## 4 0.002988781
## 5 -0.022644277
## 6 -0.008246288
## 7 -0.051154040
rsquare_sar1<-1-var(sar_fe1$resid)/var(data$Y)
rsquare_sar2<-1-var(sar_fe2$resid)/var(data$Y)
rsquare_sar3<-1-var(sar_fe3$resid)/var(data$Y)
rsquare_sar1
## [1] 0.5710826
rsquare_sar2
## [1] 0.5711605
rsquare_sar3
## [1] 0.571152
aic_sar1 <- -2 * sar_fe1$logLik + 2 * length(coefficients(sar_fe1))
aic_sar2 <- -2 * sar_fe2$logLik + 2 * length(coefficients(sar_fe2))
aic_sar3 <- -2 * sar_fe3$logLik + 2 * length(coefficients(sar_fe3))
bic_sar1 <- -2 * sar_fe1$logLik + length(coefficients(sar_fe1)) * log(length(data$Y))
bic_sar2 <- -2 * sar_fe2$logLik + length(coefficients(sar_fe2)) * log(length(data$Y))
bic_sar3 <- -2 * sar_fe3$logLik + length(coefficients(sar_fe3)) * log(length(data$Y))
# Tampilkan nilai AIC
print(aic_sar1)
## [1] -805.5759
print(aic_sar2)
## [1] -805.7674
print(aic_sar3)
## [1] -805.8024
# Tampilkan nilai BIC
print(bic_sar1)
## [1] -781.27
print(bic_sar2)
## [1] -781.4615
print(bic_sar3)
## [1] -781.4965
sem_fe1 <- spml(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, listw = dlist1, lag=FALSE, model="within", effect="time", spatial.error="b")
sem_fe2 <- spml(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, listw = dlist2, lag=FALSE, model="within", effect="time", spatial.error="b")
sem_fe3 <- spml(Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, listw = dlist3, lag=FALSE, model="within", effect="time", spatial.error="b")
summary(sem_fe1)
## Spatial panel fixed effects error model
##
##
## Call:
## spml(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## listw = dlist1, model = "within", effect = "time", lag = FALSE,
## spatial.error = "b")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.16722751 -0.03106910 0.00076035 0.02961288 0.11514058
##
## Spatial error parameter:
## Estimate Std. Error t-value Pr(>|t|)
## rho -0.36525 0.23660 -1.5437 0.1226
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0484621 0.0323254 1.4992 0.133823
## X2 -0.0391432 0.0361592 -1.0825 0.279021
## X3 -0.0957673 0.0354410 -2.7022 0.006889 **
## X4 0.0059911 0.0025948 2.3089 0.020951 *
## X5 0.1706242 0.0376867 4.5274 5.970e-06 ***
## X6 0.0144617 0.0020435 7.0768 1.476e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(sem_fe2)
## Spatial panel fixed effects error model
##
##
## Call:
## spml(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## listw = dlist2, model = "within", effect = "time", lag = FALSE,
## spatial.error = "b")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1668191 -0.0312958 0.0012163 0.0296764 0.1144222
##
## Spatial error parameter:
## Estimate Std. Error t-value Pr(>|t|)
## rho 0.0055103 0.0857774 0.0642 0.9488
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0495600 0.0317486 1.5610 0.118521
## X2 -0.0384904 0.0364840 -1.0550 0.291427
## X3 -0.0998562 0.0355440 -2.8094 0.004964 **
## X4 0.0058308 0.0025575 2.2798 0.022618 *
## X5 0.1596700 0.0375283 4.2546 2.094e-05 ***
## X6 0.0147046 0.0020230 7.2685 3.634e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(sem_fe3)
## Spatial panel fixed effects error model
##
##
## Call:
## spml(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## listw = dlist3, model = "within", effect = "time", lag = FALSE,
## spatial.error = "b")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.1662245 -0.0313774 0.0012954 0.0294912 0.1137511
##
## Spatial error parameter:
## Estimate Std. Error t-value Pr(>|t|)
## rho 0.072491 0.060810 1.1921 0.2332
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## X1 0.0460818 0.0310923 1.4821 0.138314
## X2 -0.0371240 0.0363338 -1.0217 0.306900
## X3 -0.1024856 0.0352207 -2.9098 0.003617 **
## X4 0.0061681 0.0025324 2.4357 0.014864 *
## X5 0.1501541 0.0369287 4.0661 4.782e-05 ***
## X6 0.0147943 0.0020033 7.3850 1.525e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rsquare_sem1<-1-var(sem_fe1$resid)/var(data$Y)
rsquare_sem2<-1-var(sem_fe2$resid)/var(data$Y)
rsquare_sem3<-1-var(sem_fe3$resid)/var(data$Y)
rsquare_sem1
## [1] 0.5687053
rsquare_sem2
## [1] 0.5688908
rsquare_sem3
## [1] 0.5687191
aic_sem1 <- -2 * sem_fe1$logLik + 2 * length(coefficients(sem_fe1))
aic_sem2 <- -2 * sem_fe2$logLik + 2 * length(coefficients(sem_fe2))
aic_sem3 <- -2 * sem_fe3$logLik + 2 * length(coefficients(sem_fe3))
bic_sem1 <- -2 * sem_fe1$logLik + length(coefficients(sem_fe1)) * log(length(data$Y))
bic_sem2 <- -2 * sem_fe2$logLik + length(coefficients(sem_fe2)) * log(length(data$Y))
bic_sem3 <- -2 * sem_fe3$logLik + length(coefficients(sem_fe3)) * log(length(data$Y))
# Tampilkan nilai AIC
print(aic_sem1)
## [,1]
## [1,] -179.8075
print(aic_sem2)
## [,1]
## [1,] -177.8668
print(aic_sem3)
## [,1]
## [1,] -179.1582
# Tampilkan nilai BIC
print(bic_sem1)
## [,1]
## [1,] -155.5016
print(bic_sem2)
## [,1]
## [1,] -153.5609
print(bic_sem3)
## [,1]
## [1,] -154.8523
impacts_sar_fe2 <- impacts(sar_fe2, time= 2, listw = dlist1)
# Summarize impacts
summary_impacts <- summary(impacts_sar_fe2, zstats = TRUE, short = TRUE)
print(summary_impacts)
## Impact measures (lag, trace):
## Direct Indirect Total
## X1 0.046572286 0.0037973114 0.050369598
## X2 -0.039819023 -0.0032466783 -0.043065701
## X3 -0.103881841 -0.0084700953 -0.112351937
## X4 0.005771104 0.0004705519 0.006241656
## X5 0.155230844 0.0126568804 0.167887724
## X6 0.014932875 0.0012175648 0.016150440
## ========================================================
## Simulation results ( variance matrix):
## ========================================================
## Simulated standard errors
## Direct Indirect Total
## X1 0.033231527 0.0050992104 0.035988699
## X2 0.037112647 0.0048669925 0.040368964
## X3 0.038925303 0.0085946355 0.042408459
## X4 0.002654850 0.0004926758 0.002894912
## X5 0.037926961 0.0124009115 0.043697203
## X6 0.002034401 0.0010841872 0.002378315
##
## Simulated z-values:
## Direct Indirect Total
## X1 1.489769 0.7958507 1.488398
## X2 -1.180528 -0.7495529 -1.175671
## X3 -2.686271 -0.9989054 -2.668080
## X4 2.048881 0.8927693 2.030913
## X5 4.121153 1.0598961 3.877742
## X6 7.318574 1.1281130 6.774544
##
## Simulated p-values:
## Direct Indirect Total
## X1 0.1362850 0.42612 0.13664594
## X2 0.2377901 0.45352 0.23972656
## X3 0.0072254 0.31784 0.00762861
## X4 0.0404738 0.37198 0.04226378
## X5 3.7698e-05 0.28919 0.00010543
## X6 2.5069e-13 0.25927 1.248e-11
#Uji Asumsi kenormalan
galat1 <- sar_fe2$residuals
ks.test(galat1, "pnorm", mean=mean(galat1), sd=sd(galat1))
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: galat1
## D = 0.041941, p-value = 0.7966
## alternative hypothesis: two-sided
qqnorm(galat1)
qqline(galat1)
ad.test(galat1)
##
## Anderson-Darling normality test
##
## data: galat1
## A = 0.44293, p-value = 0.2848
residuals <- sar_fe2$residuals
residuals_squared <- residuals^2
breusch_pagan_model <- lm(residuals_squared ~ X1 + X2 + X3 + X5 + X6, data=data)
summary(breusch_pagan_model)
##
## Call:
## lm(formula = residuals_squared ~ X1 + X2 + X3 + X5 + X6, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0028039 -0.0015563 -0.0007473 0.0003958 0.0247655
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0013746 0.0023344 0.589 0.5565
## X1 -0.0008540 0.0015820 -0.540 0.5898
## X2 -0.0002137 0.0019308 -0.111 0.9120
## X3 0.0017878 0.0023839 0.750 0.4541
## X5 0.0040044 0.0024761 1.617 0.1072
## X6 -0.0003493 0.0001349 -2.589 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003067 on 232 degrees of freedom
## Multiple R-squared: 0.04035, Adjusted R-squared: 0.01967
## F-statistic: 1.951 on 5 and 232 DF, p-value: 0.08685