NSC

library(readxl)

## Warning: package 'readxl' was built under R version 4.5.2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.5.3

library(plm)

## Warning: package 'plm' was built under R version 4.5.3

## 
## Attaching package: 'plm'

## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(car)

## Warning: package 'car' was built under R version 4.5.2

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(psych)

## Warning: package 'psych' was built under R version 4.5.3

## 
## Attaching package: 'psych'

## The following object is masked from 'package:car':
## 
##     logit

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(knitr)

## Warning: package 'knitr' was built under R version 4.5.2

#install.packages("kableExtra")
library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.5.3

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(ggcorrplot)

## Warning: package 'ggcorrplot' was built under R version 4.5.3

library(nortest)
library(sandwich)

## Warning: package 'sandwich' was built under R version 4.5.3

data <- read_xlsx("Data_Panel_Pertumbuhan_Penduduk_Sumatera.xlsx", sheet = "Data Panel")

data$Tahun    <- as.factor(data$Tahun)
data$Provinsi <- as.factor(data$Provinsi)
data$Daerah   <- as.factor(data$Daerah)
data$PDRB_M   <- data$PDRB / 1000

cat("Jumlah observasi   :", nrow(data), "\n")

## Jumlah observasi   : 308

cat("Jumlah kab/kota    :", n_distinct(data$Daerah), "\n")

## Jumlah kab/kota    : 154

cat("Jumlah tahun       :", n_distinct(data$Tahun), "\n")

## Jumlah tahun       : 2

cat("Missing value      :", sum(is.na(data)), "\n")

## Missing value      : 0

vars <- c("LPP", "IPM", "PDRB_M", "IT")

desc <- data[, vars] |>
  describe() |>
  select(n, mean, sd, min, max, skew, kurtosis) |>
  round(3)

rownames(desc) <- c("LPP (%)", "IPM (Indeks)", "PDRB (Miliar Rp)", "IT (Jumlah Desa)")

kable(desc, caption = "Tabel 1. Statistik Deskriptif",
      col.names = c("N", "Mean", "Std.Dev", "Min", "Max", "Skewness", "Kurtosis")) |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Tabel 1. Statistik Deskriptif
	N	Mean	Std.Dev	Min	Max	Skewness	Kurtosis
LPP (%)	308	1.390	0.565	-0.100	4.18	0.643	1.595
IPM (Indeks)	308	70.816	4.522	61.090	85.71	0.657	0.349
PDRB (Miliar Rp)	308	23004.069	33080.753	1211.352	254721.32	3.702	17.187
IT (Jumlah Desa)	308	123.435	98.436	7.000	678.00	2.112	6.483

ggplot(data, aes(x = LPP, fill = Tahun)) +
  geom_histogram(bins = 20, color = "white", alpha = 0.8, position = "dodge") +
  scale_fill_manual(values = c("#2196F3", "#FF9800")) +
  labs(title = "Distribusi Laju Pertumbuhan Penduduk",
       x = "LPP (%)", y = "Frekuensi", fill = "Tahun") +
  theme_minimal(base_size = 12)

ggplot(data, aes(x = reorder(Provinsi, LPP, median), y = LPP, fill = Provinsi)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21, outlier.color = "red") +
  coord_flip() +
  labs(title = "Sebaran LPP per Provinsi (2020–2021)", x = "Provinsi", y = "LPP (%)") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "none")

cor_matrix <- cor(data[, vars], use = "complete.obs")

ggcorrplot(cor_matrix, method = "circle", type = "lower", lab = TRUE, lab_size = 4,
           colors = c("#D32F2F", "white", "#1976D2"),
           title = "Matriks Korelasi Antar Variabel",
           ggtheme = theme_minimal())

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
##   Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

pdata <- pdata.frame(data, index = c("Daerah", "Tahun"))
pdim(pdata)

## Balanced Panel: n = 154, T = 2, N = 308

cem <- plm(LPP ~ IPM + PDRB_M + IT, data = pdata, model = "pooling")
summary(cem)

## Pooling Model
## 
## Call:
## plm(formula = LPP ~ IPM + PDRB_M + IT, data = pdata, model = "pooling")
## 
## Balanced Panel: n = 154, T = 2, N = 308
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -1.4583e+00 -3.5811e-01 -7.7479e-05  2.8045e-01  2.8284e+00 
## 
## Coefficients:
##                Estimate  Std. Error t-value Pr(>|t|)   
## (Intercept)  8.9662e-01  5.4314e-01  1.6508 0.099809 . 
## IPM          8.5313e-03  7.6619e-03  1.1135 0.266385   
## PDRB_M       9.5793e-08  1.0462e-06  0.0916 0.927107   
## IT          -9.1208e-04  3.3051e-04 -2.7596 0.006138 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    98.013
## Residual Sum of Squares: 94.819
## R-Squared:      0.032586
## Adj. R-Squared: 0.02304
## F-statistic: 3.41332 on 3 and 304 DF, p-value: 0.017835

fem <- plm(LPP ~ IPM + PDRB_M + IT, data = pdata, model = "within", effect = "individual")
summary(fem)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = LPP ~ IPM + PDRB_M + IT, data = pdata, effect = "individual", 
##     model = "within")
## 
## Balanced Panel: n = 154, T = 2, N = 308
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -8.8516e-01 -1.3272e-01  5.1083e-15  1.3272e-01  8.8516e-01 
## 
## Coefficients:
##           Estimate  Std. Error t-value  Pr(>|t|)    
## IPM    -7.1721e-01  1.2625e-01 -5.6811 6.683e-08 ***
## PDRB_M  3.3863e-05  9.2007e-06  3.6804 0.0003235 ***
## IT     -3.2999e-03  1.8746e-03 -1.7604 0.0803682 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    15.578
## Residual Sum of Squares: 11.739
## R-Squared:      0.24645
## Adj. R-Squared: -0.53205
## F-statistic: 16.4619 on 3 and 151 DF, p-value: 2.6285e-09

rem <- plm(LPP ~ IPM + PDRB_M + IT, data = pdata, model = "random", effect = "individual")
summary(rem)

## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = LPP ~ IPM + PDRB_M + IT, data = pdata, effect = "individual", 
##     model = "random")
## 
## Balanced Panel: n = 154, T = 2, N = 308
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.07774 0.27882 0.256
## individual    0.22595 0.47534 0.744
## theta: 0.6169
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.8120291 -0.1916862 -0.0096102  0.1455514  1.6527822 
## 
## Coefficients:
##                Estimate  Std. Error z-value Pr(>|z|)   
## (Intercept)  1.3480e+00  7.5151e-01  1.7937 0.072867 . 
## IPM          2.5433e-03  1.0612e-02  0.2397 0.810594   
## PDRB_M       6.2847e-07  1.4424e-06  0.4357 0.663052   
## IT          -1.2325e-03  4.4722e-04 -2.7560 0.005852 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    27.677
## Residual Sum of Squares: 26.957
## R-Squared:      0.026038
## Adj. R-Squared: 0.016427
## Chisq: 8.1273 on 3 DF, p-value: 0.043453

chow <- pooltest(cem, fem)
print(chow)

## 
##  F statistic
## 
## data:  LPP ~ IPM + PDRB_M + IT
## F = 6.985, df1 = 153, df2 = 151, p-value < 2.2e-16
## alternative hypothesis: unstability

cat("\nKesimpulan Uji Chow:\n")

## 
## Kesimpulan Uji Chow:

if (chow$p.value < 0.05) {
  cat("p-value =", round(chow$p.value, 4), "< 0.05 -> Tolak H0 -> FEM lebih baik dari CEM.\n")
} else {
  cat("p-value =", round(chow$p.value, 4), ">= 0.05 -> Gagal Tolak H0 -> CEM lebih baik.\n")
}

## p-value = 0 < 0.05 -> Tolak H0 -> FEM lebih baik dari CEM.

hausman <- phtest(fem, rem)
print(hausman)

## 
##  Hausman Test
## 
## data:  LPP ~ IPM + PDRB_M + IT
## chisq = 45.922, df = 3, p-value = 5.891e-10
## alternative hypothesis: one model is inconsistent

cat("\nKesimpulan Uji Hausman:\n")

## 
## Kesimpulan Uji Hausman:

if (hausman$p.value < 0.05) {
  cat("p-value =", round(hausman$p.value, 4), "< 0.05 -> Tolak H0 -> FEM lebih tepat.\n")
} else {
  cat("p-value =", round(hausman$p.value, 4), ">= 0.05 -> Gagal Tolak H0 -> REM lebih efisien.\n")
}

## p-value = 0 < 0.05 -> Tolak H0 -> FEM lebih tepat.

lm_test <- plmtest(cem, type = "bp")
print(lm_test)

## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  LPP ~ IPM + PDRB_M + IT
## chisq = 70.384, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects

cat("\nKesimpulan Uji LM Breusch-Pagan:\n")

## 
## Kesimpulan Uji LM Breusch-Pagan:

if (lm_test$p.value < 0.05) {
  cat("p-value =", round(lm_test$p.value, 4), "< 0.05 -> Tolak H0 -> REM lebih baik dari CEM.\n")
} else {
  cat("p-value =", round(lm_test$p.value, 4), ">= 0.05 -> Gagal Tolak H0 -> CEM cukup memadai.\n")
}

## p-value = 0 < 0.05 -> Tolak H0 -> REM lebih baik dari CEM.

p_chow    <- chow$p.value
p_hausman <- hausman$p.value
p_lm      <- lm_test$p.value

if (p_chow < 0.05 & p_hausman < 0.05) {
  model_terpilih <- fem
  nama_model     <- "Fixed Effect Model (FEM)"
} else if (p_chow < 0.05 & p_hausman >= 0.05) {
  model_terpilih <- rem
  nama_model     <- "Random Effect Model (REM)"
} else if (p_chow >= 0.05 & p_lm < 0.05) {
  model_terpilih <- rem
  nama_model     <- "Random Effect Model (REM)"
} else {
  model_terpilih <- cem
  nama_model     <- "Common Effect Model (CEM)"
}

hasil_pemilihan <- data.frame(
  Uji       = c("Uji Chow (CEM vs FEM)", "Uji Hausman (FEM vs REM)", "Uji LM BP (CEM vs REM)"),
  p_value   = round(c(p_chow, p_hausman, p_lm), 4),
  Keputusan = c(
    ifelse(p_chow    < 0.05, "Tolak H0 -> FEM",  "Gagal Tolak H0 -> CEM"),
    ifelse(p_hausman < 0.05, "Tolak H0 -> FEM",  "Gagal Tolak H0 -> REM"),
    ifelse(p_lm      < 0.05, "Tolak H0 -> REM",  "Gagal Tolak H0 -> CEM")
  )
)

kable(hasil_pemilihan, caption = "Tabel 2. Ringkasan Pemilihan Model") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Tabel 2. Ringkasan Pemilihan Model
Uji	p_value	Keputusan
Uji Chow (CEM vs FEM)	0	Tolak H0 -> FEM
Uji Hausman (FEM vs REM)	0	Tolak H0 -> FEM
Uji LM BP (CEM vs REM)	0	Tolak H0 -> REM

cat("\nModel Terpilih:", nama_model, "\n")

## 
## Model Terpilih: Fixed Effect Model (FEM)

ols_vif <- lm(LPP ~ IPM + PDRB_M + IT, data = data)
vif_val <- vif(ols_vif)

vif_tabel <- data.frame(
  Variabel = names(vif_val),
  VIF      = round(vif_val, 3),
  Status   = ifelse(vif_val < 5,  "Tidak ada multikolinearitas",
             ifelse(vif_val < 10, "Multikolinearitas sedang",
                                  "Multikolinearitas serius"))
)

kable(vif_tabel, caption = "Tabel 3. Uji Multikolinearitas (VIF)", row.names = FALSE) |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Tabel 3. Uji Multikolinearitas (VIF)
Variabel	VIF	Status
IPM	1.182	Tidak ada multikolinearitas
PDRB_M	1.179	Tidak ada multikolinearitas
IT	1.042	Tidak ada multikolinearitas

bp_test <- bptest(model_terpilih)
print(bp_test)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_terpilih
## BP = 4.0051, df = 3, p-value = 0.2609

cat("\nKesimpulan:\n")

## 
## Kesimpulan:

if (bp_test$p.value < 0.05) {
  cat("p-value =", round(bp_test$p.value, 4), "< 0.05 -> Ada heteroskedastisitas -> Gunakan robust SE (HC3).\n")
} else {
  cat("p-value =", round(bp_test$p.value, 4), ">= 0.05 -> Tidak ada heteroskedastisitas.\n")
}

## p-value = 0.2609 >= 0.05 -> Tidak ada heteroskedastisitas.

resid_model <- residuals(model_terpilih)

ks_test <- lillie.test(resid_model)
sw_test <- shapiro.test(resid_model)

print(ks_test)

## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  resid_model
## D = 0.02645, p-value = 0.8652

print(sw_test)

## 
##  Shapiro-Wilk normality test
## 
## data:  resid_model
## W = 0.98447, p-value = 0.002072

cat("\nKesimpulan:\n")

## 
## Kesimpulan:

if (sw_test$p.value < 0.05 | ks_test$p.value < 0.05) {
  cat("Residual tidak normal. Dengan n=308, CLT menjamin inferensi tetap valid.\n")
} else {
  cat("Residual berdistribusi normal.\n")
}

## Residual tidak normal. Dengan n=308, CLT menjamin inferensi tetap valid.

par(mfrow = c(1, 2))

hist(resid_model, breaks = 20, col = "#42A5F5", border = "white",
     main = "Histogram Residual", xlab = "Residual", prob = TRUE)
curve(dnorm(x, mean = mean(resid_model), sd = sd(resid_model)),
      add = TRUE, col = "red", lwd = 2)

qqnorm(resid_model, main = "Q-Q Plot Residual", pch = 20, col = "#42A5F5")
qqline(resid_model, col = "red", lwd = 2)

par(mfrow = c(1, 1))

ggplot(data.frame(fitted = as.numeric(fitted(model_terpilih)),
                  resid  = as.numeric(residuals(model_terpilih))),
       aes(x = fitted, y = resid)) +
  geom_point(color = "#1565C0", alpha = 0.6, size = 2) +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed", linewidth = 1) +
  geom_smooth(method = "loess", se = TRUE, color = "#FF6F00", linewidth = 0.8) +
  labs(title = "Plot Residual vs Fitted Values", x = "Fitted Values", y = "Residual") +
  theme_minimal(base_size = 12)

## `geom_smooth()` using formula = 'y ~ x'

bp_pval <- bp_test$p.value

if (bp_pval < 0.05) {
  coef_mat <- coeftest(model_terpilih, vcov = vcovHC(model_terpilih, type = "HC3"))
} else {
  coef_mat <- coeftest(model_terpilih)
}

r2     <- summary(model_terpilih)$r.squared
r2_adj <- summary(model_terpilih)$adj.r.squared
fstat  <- summary(model_terpilih)$fstatistic

r2_cem <- summary(cem)$r.squared[1]
r2_fem <- summary(fem)$r.squared[1]
r2_rem <- summary(rem)$r.squared[1]

perbandingan <- data.frame(
  Model       = c("Common Effect Model (CEM)", "Fixed Effect Model (FEM)", "Random Effect Model (REM)"),
  R_squared   = round(c(r2_cem, r2_fem, r2_rem), 4),
  Uji_Chow    = c("-", paste("p =", round(p_chow, 4)), "-"),
  Uji_Hausman = c("-", "-", paste("p =", round(p_hausman, 4))),
  Uji_LM      = c(paste("p =", round(p_lm, 4)), "-", "-"),
  Terpilih    = c(
    ifelse(nama_model == "Common Effect Model (CEM)", "V", ""),
    ifelse(nama_model == "Fixed Effect Model (FEM)",  "V", ""),
    ifelse(nama_model == "Random Effect Model (REM)", "V", "")
  )
)

kable(perbandingan, caption = "Tabel 6. Perbandingan Ketiga Model") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Tabel 6. Perbandingan Ketiga Model
Model	R_squared	Uji_Chow	Uji_Hausman	Uji_LM	Terpilih
Common Effect Model (CEM)	0.0326			p = 0
Fixed Effect Model (FEM)	0.2465	p = 0			V
Random Effect Model (REM)	0.0260		p = 0

NSC

Muhammad Khayruhanif