library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(plm)
## Warning: package 'plm' was built under R version 4.5.3
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(psych)
## Warning: package 'psych' was built under R version 4.5.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.2
#install.packages("kableExtra")
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.5.3
library(nortest)
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.5.3
data <- read_xlsx("Data_Panel_Pertumbuhan_Penduduk_Sumatera.xlsx", sheet = "Data Panel")

data$Tahun    <- as.factor(data$Tahun)
data$Provinsi <- as.factor(data$Provinsi)
data$Daerah   <- as.factor(data$Daerah)
data$PDRB_M   <- data$PDRB / 1000

cat("Jumlah observasi   :", nrow(data), "\n")
## Jumlah observasi   : 308
cat("Jumlah kab/kota    :", n_distinct(data$Daerah), "\n")
## Jumlah kab/kota    : 154
cat("Jumlah tahun       :", n_distinct(data$Tahun), "\n")
## Jumlah tahun       : 2
cat("Missing value      :", sum(is.na(data)), "\n")
## Missing value      : 0
vars <- c("LPP", "IPM", "PDRB_M", "IT")

desc <- data[, vars] |>
  describe() |>
  select(n, mean, sd, min, max, skew, kurtosis) |>
  round(3)

rownames(desc) <- c("LPP (%)", "IPM (Indeks)", "PDRB (Miliar Rp)", "IT (Jumlah Desa)")

kable(desc, caption = "Tabel 1. Statistik Deskriptif",
      col.names = c("N", "Mean", "Std.Dev", "Min", "Max", "Skewness", "Kurtosis")) |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Tabel 1. Statistik Deskriptif
N Mean Std.Dev Min Max Skewness Kurtosis
LPP (%) 308 1.390 0.565 -0.100 4.18 0.643 1.595
IPM (Indeks) 308 70.816 4.522 61.090 85.71 0.657 0.349
PDRB (Miliar Rp) 308 23004.069 33080.753 1211.352 254721.32 3.702 17.187
IT (Jumlah Desa) 308 123.435 98.436 7.000 678.00 2.112 6.483
ggplot(data, aes(x = LPP, fill = Tahun)) +
  geom_histogram(bins = 20, color = "white", alpha = 0.8, position = "dodge") +
  scale_fill_manual(values = c("#2196F3", "#FF9800")) +
  labs(title = "Distribusi Laju Pertumbuhan Penduduk",
       x = "LPP (%)", y = "Frekuensi", fill = "Tahun") +
  theme_minimal(base_size = 12)

ggplot(data, aes(x = reorder(Provinsi, LPP, median), y = LPP, fill = Provinsi)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21, outlier.color = "red") +
  coord_flip() +
  labs(title = "Sebaran LPP per Provinsi (2020–2021)", x = "Provinsi", y = "LPP (%)") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "none")

cor_matrix <- cor(data[, vars], use = "complete.obs")

ggcorrplot(cor_matrix, method = "circle", type = "lower", lab = TRUE, lab_size = 4,
           colors = c("#D32F2F", "white", "#1976D2"),
           title = "Matriks Korelasi Antar Variabel",
           ggtheme = theme_minimal())
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
##   Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

pdata <- pdata.frame(data, index = c("Daerah", "Tahun"))
pdim(pdata)
## Balanced Panel: n = 154, T = 2, N = 308
cem <- plm(LPP ~ IPM + PDRB_M + IT, data = pdata, model = "pooling")
summary(cem)
## Pooling Model
## 
## Call:
## plm(formula = LPP ~ IPM + PDRB_M + IT, data = pdata, model = "pooling")
## 
## Balanced Panel: n = 154, T = 2, N = 308
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -1.4583e+00 -3.5811e-01 -7.7479e-05  2.8045e-01  2.8284e+00 
## 
## Coefficients:
##                Estimate  Std. Error t-value Pr(>|t|)   
## (Intercept)  8.9662e-01  5.4314e-01  1.6508 0.099809 . 
## IPM          8.5313e-03  7.6619e-03  1.1135 0.266385   
## PDRB_M       9.5793e-08  1.0462e-06  0.0916 0.927107   
## IT          -9.1208e-04  3.3051e-04 -2.7596 0.006138 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    98.013
## Residual Sum of Squares: 94.819
## R-Squared:      0.032586
## Adj. R-Squared: 0.02304
## F-statistic: 3.41332 on 3 and 304 DF, p-value: 0.017835
fem <- plm(LPP ~ IPM + PDRB_M + IT, data = pdata, model = "within", effect = "individual")
summary(fem)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = LPP ~ IPM + PDRB_M + IT, data = pdata, effect = "individual", 
##     model = "within")
## 
## Balanced Panel: n = 154, T = 2, N = 308
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -8.8516e-01 -1.3272e-01  5.1083e-15  1.3272e-01  8.8516e-01 
## 
## Coefficients:
##           Estimate  Std. Error t-value  Pr(>|t|)    
## IPM    -7.1721e-01  1.2625e-01 -5.6811 6.683e-08 ***
## PDRB_M  3.3863e-05  9.2007e-06  3.6804 0.0003235 ***
## IT     -3.2999e-03  1.8746e-03 -1.7604 0.0803682 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    15.578
## Residual Sum of Squares: 11.739
## R-Squared:      0.24645
## Adj. R-Squared: -0.53205
## F-statistic: 16.4619 on 3 and 151 DF, p-value: 2.6285e-09
rem <- plm(LPP ~ IPM + PDRB_M + IT, data = pdata, model = "random", effect = "individual")
summary(rem)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = LPP ~ IPM + PDRB_M + IT, data = pdata, effect = "individual", 
##     model = "random")
## 
## Balanced Panel: n = 154, T = 2, N = 308
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.07774 0.27882 0.256
## individual    0.22595 0.47534 0.744
## theta: 0.6169
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.8120291 -0.1916862 -0.0096102  0.1455514  1.6527822 
## 
## Coefficients:
##                Estimate  Std. Error z-value Pr(>|z|)   
## (Intercept)  1.3480e+00  7.5151e-01  1.7937 0.072867 . 
## IPM          2.5433e-03  1.0612e-02  0.2397 0.810594   
## PDRB_M       6.2847e-07  1.4424e-06  0.4357 0.663052   
## IT          -1.2325e-03  4.4722e-04 -2.7560 0.005852 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    27.677
## Residual Sum of Squares: 26.957
## R-Squared:      0.026038
## Adj. R-Squared: 0.016427
## Chisq: 8.1273 on 3 DF, p-value: 0.043453
chow <- pooltest(cem, fem)
print(chow)
## 
##  F statistic
## 
## data:  LPP ~ IPM + PDRB_M + IT
## F = 6.985, df1 = 153, df2 = 151, p-value < 2.2e-16
## alternative hypothesis: unstability
cat("\nKesimpulan Uji Chow:\n")
## 
## Kesimpulan Uji Chow:
if (chow$p.value < 0.05) {
  cat("p-value =", round(chow$p.value, 4), "< 0.05 -> Tolak H0 -> FEM lebih baik dari CEM.\n")
} else {
  cat("p-value =", round(chow$p.value, 4), ">= 0.05 -> Gagal Tolak H0 -> CEM lebih baik.\n")
}
## p-value = 0 < 0.05 -> Tolak H0 -> FEM lebih baik dari CEM.
hausman <- phtest(fem, rem)
print(hausman)
## 
##  Hausman Test
## 
## data:  LPP ~ IPM + PDRB_M + IT
## chisq = 45.922, df = 3, p-value = 5.891e-10
## alternative hypothesis: one model is inconsistent
cat("\nKesimpulan Uji Hausman:\n")
## 
## Kesimpulan Uji Hausman:
if (hausman$p.value < 0.05) {
  cat("p-value =", round(hausman$p.value, 4), "< 0.05 -> Tolak H0 -> FEM lebih tepat.\n")
} else {
  cat("p-value =", round(hausman$p.value, 4), ">= 0.05 -> Gagal Tolak H0 -> REM lebih efisien.\n")
}
## p-value = 0 < 0.05 -> Tolak H0 -> FEM lebih tepat.
lm_test <- plmtest(cem, type = "bp")
print(lm_test)
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  LPP ~ IPM + PDRB_M + IT
## chisq = 70.384, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
cat("\nKesimpulan Uji LM Breusch-Pagan:\n")
## 
## Kesimpulan Uji LM Breusch-Pagan:
if (lm_test$p.value < 0.05) {
  cat("p-value =", round(lm_test$p.value, 4), "< 0.05 -> Tolak H0 -> REM lebih baik dari CEM.\n")
} else {
  cat("p-value =", round(lm_test$p.value, 4), ">= 0.05 -> Gagal Tolak H0 -> CEM cukup memadai.\n")
}
## p-value = 0 < 0.05 -> Tolak H0 -> REM lebih baik dari CEM.
p_chow    <- chow$p.value
p_hausman <- hausman$p.value
p_lm      <- lm_test$p.value

if (p_chow < 0.05 & p_hausman < 0.05) {
  model_terpilih <- fem
  nama_model     <- "Fixed Effect Model (FEM)"
} else if (p_chow < 0.05 & p_hausman >= 0.05) {
  model_terpilih <- rem
  nama_model     <- "Random Effect Model (REM)"
} else if (p_chow >= 0.05 & p_lm < 0.05) {
  model_terpilih <- rem
  nama_model     <- "Random Effect Model (REM)"
} else {
  model_terpilih <- cem
  nama_model     <- "Common Effect Model (CEM)"
}

hasil_pemilihan <- data.frame(
  Uji       = c("Uji Chow (CEM vs FEM)", "Uji Hausman (FEM vs REM)", "Uji LM BP (CEM vs REM)"),
  p_value   = round(c(p_chow, p_hausman, p_lm), 4),
  Keputusan = c(
    ifelse(p_chow    < 0.05, "Tolak H0 -> FEM",  "Gagal Tolak H0 -> CEM"),
    ifelse(p_hausman < 0.05, "Tolak H0 -> FEM",  "Gagal Tolak H0 -> REM"),
    ifelse(p_lm      < 0.05, "Tolak H0 -> REM",  "Gagal Tolak H0 -> CEM")
  )
)

kable(hasil_pemilihan, caption = "Tabel 2. Ringkasan Pemilihan Model") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Tabel 2. Ringkasan Pemilihan Model
Uji p_value Keputusan
Uji Chow (CEM vs FEM) 0 Tolak H0 -> FEM
Uji Hausman (FEM vs REM) 0 Tolak H0 -> FEM
Uji LM BP (CEM vs REM) 0 Tolak H0 -> REM
cat("\nModel Terpilih:", nama_model, "\n")
## 
## Model Terpilih: Fixed Effect Model (FEM)
ols_vif <- lm(LPP ~ IPM + PDRB_M + IT, data = data)
vif_val <- vif(ols_vif)

vif_tabel <- data.frame(
  Variabel = names(vif_val),
  VIF      = round(vif_val, 3),
  Status   = ifelse(vif_val < 5,  "Tidak ada multikolinearitas",
             ifelse(vif_val < 10, "Multikolinearitas sedang",
                                  "Multikolinearitas serius"))
)

kable(vif_tabel, caption = "Tabel 3. Uji Multikolinearitas (VIF)", row.names = FALSE) |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Tabel 3. Uji Multikolinearitas (VIF)
Variabel VIF Status
IPM 1.182 Tidak ada multikolinearitas
PDRB_M 1.179 Tidak ada multikolinearitas
IT 1.042 Tidak ada multikolinearitas
bp_test <- bptest(model_terpilih)
print(bp_test)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_terpilih
## BP = 4.0051, df = 3, p-value = 0.2609
cat("\nKesimpulan:\n")
## 
## Kesimpulan:
if (bp_test$p.value < 0.05) {
  cat("p-value =", round(bp_test$p.value, 4), "< 0.05 -> Ada heteroskedastisitas -> Gunakan robust SE (HC3).\n")
} else {
  cat("p-value =", round(bp_test$p.value, 4), ">= 0.05 -> Tidak ada heteroskedastisitas.\n")
}
## p-value = 0.2609 >= 0.05 -> Tidak ada heteroskedastisitas.
resid_model <- residuals(model_terpilih)

ks_test <- lillie.test(resid_model)
sw_test <- shapiro.test(resid_model)

print(ks_test)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  resid_model
## D = 0.02645, p-value = 0.8652
print(sw_test)
## 
##  Shapiro-Wilk normality test
## 
## data:  resid_model
## W = 0.98447, p-value = 0.002072
cat("\nKesimpulan:\n")
## 
## Kesimpulan:
if (sw_test$p.value < 0.05 | ks_test$p.value < 0.05) {
  cat("Residual tidak normal. Dengan n=308, CLT menjamin inferensi tetap valid.\n")
} else {
  cat("Residual berdistribusi normal.\n")
}
## Residual tidak normal. Dengan n=308, CLT menjamin inferensi tetap valid.
par(mfrow = c(1, 2))

hist(resid_model, breaks = 20, col = "#42A5F5", border = "white",
     main = "Histogram Residual", xlab = "Residual", prob = TRUE)
curve(dnorm(x, mean = mean(resid_model), sd = sd(resid_model)),
      add = TRUE, col = "red", lwd = 2)

qqnorm(resid_model, main = "Q-Q Plot Residual", pch = 20, col = "#42A5F5")
qqline(resid_model, col = "red", lwd = 2)

par(mfrow = c(1, 1))
ggplot(data.frame(fitted = as.numeric(fitted(model_terpilih)),
                  resid  = as.numeric(residuals(model_terpilih))),
       aes(x = fitted, y = resid)) +
  geom_point(color = "#1565C0", alpha = 0.6, size = 2) +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed", linewidth = 1) +
  geom_smooth(method = "loess", se = TRUE, color = "#FF6F00", linewidth = 0.8) +
  labs(title = "Plot Residual vs Fitted Values", x = "Fitted Values", y = "Residual") +
  theme_minimal(base_size = 12)
## `geom_smooth()` using formula = 'y ~ x'

bp_pval <- bp_test$p.value

if (bp_pval < 0.05) {
  coef_mat <- coeftest(model_terpilih, vcov = vcovHC(model_terpilih, type = "HC3"))
} else {
  coef_mat <- coeftest(model_terpilih)
}
r2     <- summary(model_terpilih)$r.squared
r2_adj <- summary(model_terpilih)$adj.r.squared
fstat  <- summary(model_terpilih)$fstatistic
r2_cem <- summary(cem)$r.squared[1]
r2_fem <- summary(fem)$r.squared[1]
r2_rem <- summary(rem)$r.squared[1]

perbandingan <- data.frame(
  Model       = c("Common Effect Model (CEM)", "Fixed Effect Model (FEM)", "Random Effect Model (REM)"),
  R_squared   = round(c(r2_cem, r2_fem, r2_rem), 4),
  Uji_Chow    = c("-", paste("p =", round(p_chow, 4)), "-"),
  Uji_Hausman = c("-", "-", paste("p =", round(p_hausman, 4))),
  Uji_LM      = c(paste("p =", round(p_lm, 4)), "-", "-"),
  Terpilih    = c(
    ifelse(nama_model == "Common Effect Model (CEM)", "V", ""),
    ifelse(nama_model == "Fixed Effect Model (FEM)",  "V", ""),
    ifelse(nama_model == "Random Effect Model (REM)", "V", "")
  )
)

kable(perbandingan, caption = "Tabel 6. Perbandingan Ketiga Model") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Tabel 6. Perbandingan Ketiga Model
Model R_squared Uji_Chow Uji_Hausman Uji_LM Terpilih
Common Effect Model (CEM) 0.0326
p = 0
Fixed Effect Model (FEM) 0.2465 p = 0
V
Random Effect Model (REM) 0.0260
p = 0