library(readxl)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(knitr)

#plot
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(ggplot2)

# uji asumsi
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lmtest)
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# modelling
library(plm)
## Warning: package 'plm' was built under R version 4.4.3
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead

Load Datset

# Load dataset
data_panel <- read_excel("D:/UNY/MySta/SEM 5/Ekonometrika/Data Panel 2020-2024.xlsx")
data_panel
## # A tibble: 190 × 7
##    Provinsi             Tahun  Gini   PDRB Miskin   RLS     UMP
##    <chr>                <dbl> <dbl>  <dbl>  <dbl> <dbl>   <dbl>
##  1 ACEH                  2020 0.323  31633  15.0   9.71 3165031
##  2 SUMATERA UTARA        2020 0.316  54979   8.75  9.83 2499423
##  3 SUMATERA BARAT        2020 0.305  43826   6.28  9.34 2484041
##  4 RIAU                  2020 0.329 114167   6.82  9.47 2888564
##  5 JAMBI                 2020 0.32   57958   7.58  8.97 2630162
##  6 SUMATERA SELATAN      2020 0.339  53843  12.7   8.68 3043111
##  7 BENGKULU              2020 0.334  36552  15.0   9.2  2213604
##  8 LAMPUNG               2020 0.327  39290  12.3   8.51 2432002
##  9 KEP. BANGKA BELITUNG  2020 0.262  52023   4.53  8.49 3230024
## 10 KEP. RIAU             2020 0.339 123465   5.92 10.2  3005460
## # ℹ 180 more rows

Data Exploration

# Jumlah observasi dan variabel
dim(data_panel)
## [1] 190   7
# Lihat struktur data
str(data_panel)
## tibble [190 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi: chr [1:190] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
##  $ Tahun   : num [1:190] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ Gini    : num [1:190] 0.323 0.316 0.305 0.329 0.32 0.339 0.334 0.327 0.262 0.339 ...
##  $ PDRB    : num [1:190] 31633 54979 43826 114167 57958 ...
##  $ Miskin  : num [1:190] 14.99 8.75 6.28 6.82 7.58 ...
##  $ RLS     : num [1:190] 9.71 9.83 9.34 9.47 8.97 ...
##  $ UMP     : num [1:190] 3165031 2499423 2484041 2888564 2630162 ...
# Ringkasan statistik dasar
summary(data_panel)
##    Provinsi             Tahun           Gini             PDRB       
##  Length:190         Min.   :2020   Min.   :0.2360   Min.   : 16870  
##  Class :character   1st Qu.:2021   1st Qu.:0.3160   1st Qu.: 43763  
##  Mode  :character   Median :2022   Median :0.3400   Median : 57294  
##                     Mean   :2022   Mean   :0.3453   Mean   : 74329  
##                     3rd Qu.:2023   3rd Qu.:0.3728   3rd Qu.: 73844  
##                     Max.   :2024   Max.   :0.4490   Max.   :344350  
##                                    NA's   :16       NA's   :12      
##      Miskin            RLS              UMP         
##  Min.   : 3.780   Min.   : 5.100   Min.   :1704608  
##  1st Qu.: 6.320   1st Qu.: 8.580   1st Qu.:2460997  
##  Median : 8.735   Median : 9.250   Median :2812138  
##  Mean   :10.548   Mean   : 9.219   Mean   :2834718  
##  3rd Qu.:12.980   3rd Qu.: 9.820   3rd Qu.:3189000  
##  Max.   :32.970   Max.   :11.490   Max.   :5067381  
##  NA's   :16       NA's   :16       NA's   :16
# Cek missing per kolom
colSums(is.na(data_panel))
## Provinsi    Tahun     Gini     PDRB   Miskin      RLS      UMP 
##        0        0       16       12       16       16       16
# Persentase missing per kolom
sapply(data_panel, function(x) mean(is.na(x))) * 100
## Provinsi    Tahun     Gini     PDRB   Miskin      RLS      UMP 
## 0.000000 0.000000 8.421053 6.315789 8.421053 8.421053 8.421053
# Cek duplikat
dup <- data_panel %>%
  group_by(Provinsi, Tahun) %>%
  filter(n() > 1)

nrow(dup)      
## [1] 0
# Cek Jumlah Observasi Tiap Provinsi dan Tahun
data_panel %>%
  count(Provinsi) %>%
  arrange(desc(n))
## # A tibble: 38 × 2
##    Provinsi          n
##    <chr>         <int>
##  1 ACEH              5
##  2 BALI              5
##  3 BANTEN            5
##  4 BENGKULU          5
##  5 DI YOGYAKARTA     5
##  6 DKI JAKARTA       5
##  7 GORONTALO         5
##  8 JAMBI             5
##  9 JAWA BARAT        5
## 10 JAWA TENGAH       5
## # ℹ 28 more rows
data_panel %>%
  count(Tahun)
## # A tibble: 5 × 2
##   Tahun     n
##   <dbl> <int>
## 1  2020    38
## 2  2021    38
## 3  2022    38
## 4  2023    38
## 5  2024    38
# cek tipe data
sapply(data_panel, class)
##    Provinsi       Tahun        Gini        PDRB      Miskin         RLS 
## "character"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##         UMP 
##   "numeric"
# Cek Outlier
cek_outlier <- function(x) {
  stats <- boxplot.stats(x)
  return(length(stats$out))
}

sapply(data_panel[, c("Gini", "PDRB", "Miskin", "RLS", "UMP")], cek_outlier)
##   Gini   PDRB Miskin    RLS    UMP 
##      0     26      6      2      4
# Distribusi tiap variabel
data_panel %>%
  pivot_longer(
    cols = c(Gini, PDRB, Miskin, RLS, UMP),
    names_to = "Variabel", 
    values_to = "Nilai"
  ) %>%
  ggplot(aes(x = "", y = Nilai, fill = "skyblue")) +
  geom_boxplot(color = "black", fill = "skyblue") +
  facet_wrap(~Variabel, scales = "free", ncol = 3) +
  labs(
    title = "Distribusi Variabel Ekonomi 2020–2024",
    x = NULL,
    y = "Nilai"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold", size = 11),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )
## Warning: Removed 76 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Heatmap Rasio Gini Tiap Provinsi & Tahun
ggplot(data_panel, aes(x = factor(Tahun), y = reorder(Provinsi, Gini), fill = Gini)) +
  geom_tile(color = "white", width = 0.9, height = 0.9) +   # kotak lebih kecil, jadi persegi
  scale_fill_gradient(low = "#DEEBF7", high = "#08306B", name = "Gini") +  # biru tua → biru muda
  labs(
    title = "Heatmap Gini Ratio per Provinsi (2020–2024)",
    x = "Tahun",
    y = "Provinsi"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.y = element_text(size = 8),
    axis.text.x = element_text(size = 10),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid = element_blank(),
    legend.position = "right"
  )

### Data Preprocessing

# Load dataset
data_panel <- read_excel("D:/UNY/MySta/SEM 5/Ekonometrika/Data Panel 2020-2024.xlsx")
# Handling Missing Value

# Sebelum handling missing value
missing_before <- colSums(is.na(data_panel))

# Handling missing value
data_panel <- data_panel %>% 
  filter(complete.cases(.))

# Sesudah handling missing value
missing_after <- colSums(is.na(data_panel))

#  tabel perbandingan
tabel_missing <- data.frame(
  Variabel = names(missing_before),
  Missing_Sebelum = missing_before,
  Missing_Sesudah = missing_after,
  Selisih = missing_before - missing_after
)

print(tabel_missing)
##          Variabel Missing_Sebelum Missing_Sesudah Selisih
## Provinsi Provinsi               0               0       0
## Tahun       Tahun               0               0       0
## Gini         Gini              16               0      16
## PDRB         PDRB              12               0      12
## Miskin     Miskin              16               0      16
## RLS           RLS              16               0      16
## UMP           UMP              16               0      16
# Handling Ourlier 

#  jumlah outlier sebelum handling 
cek_outlier <- function(x) {
  stats <- boxplot.stats(x)
  return(length(stats$out))
}

before_outlier <- sapply(data_panel[, c("Gini", "PDRB", "Miskin", "RLS", "UMP")], cek_outlier)

# andling Outlier 
winsorize_iqr <- function(x) {
  Q1 <- quantile(x, 0.25, na.rm = TRUE)
  Q3 <- quantile(x, 0.75, na.rm = TRUE)
  IQR_value <- Q3 - Q1
  lower <- Q1 - 1.5 * IQR_value
  upper <- Q3 + 1.5 * IQR_value
  x[x < lower] <- lower
  x[x > upper] <- upper
  return(x)
}

# PDRB khusus: log-transform dulu, lalu winsorize_iqr
data_after_outlier <- data_panel %>%
  mutate(
    Gini   = winsorize_iqr(Gini),
    PDRB   = winsorize_iqr(log(PDRB)),  # log transform untuk stabilisasi
    Miskin = winsorize_iqr(Miskin),
    RLS    = winsorize_iqr(RLS),
    UMP    = winsorize_iqr(UMP)
  )

#  jumlah outlier sesudah handling
after_outlier <- sapply(data_after_outlier[, c("Gini", "PDRB", "Miskin", "RLS", "UMP")], cek_outlier)

# Tabel perbandingan 
tabel_outlier <- data.frame(
  Variabel = names(before_outlier),
  Jumlah_Outlier_Sebelum = before_outlier,
  Jumlah_Outlier_Sesudah = after_outlier
)

tabel_outlier <- tabel_outlier %>%
  mutate(Selisih = Jumlah_Outlier_Sebelum - Jumlah_Outlier_Sesudah)
print(tabel_outlier)
##        Variabel Jumlah_Outlier_Sebelum Jumlah_Outlier_Sesudah Selisih
## Gini       Gini                      0                      0       0
## PDRB       PDRB                     26                      0      26
## Miskin   Miskin                      6                      0       6
## RLS         RLS                      2                      0       2
## UMP         UMP                      4                      0       4
df_cleaned <- data_after_outlier

Uji Asumsi Klasik

# MODEL DASAR OLS untuk cek asumsi
ols_model_pdrb <- lm(PDRB ~ Gini + Miskin + RLS + UMP, data = df_cleaned)

# Simpan residual dan fitted untuk plot selanjutnya
residuals_ols <- residuals(ols_model_pdrb)
fitted_ols <- fitted(ols_model_pdrb)

summary(ols_model_pdrb)
## 
## Call:
## lm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = df_cleaned)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76629 -0.22597 -0.07348  0.15483  0.98890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.773e+00  4.078e-01  21.511  < 2e-16 ***
## Gini         6.823e-01  6.949e-01   0.982 0.327576    
## Miskin      -3.528e-02  6.251e-03  -5.644 6.86e-08 ***
## RLS          1.382e-01  3.742e-02   3.693 0.000299 ***
## UMP          3.840e-07  5.195e-08   7.392 6.39e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3765 on 169 degrees of freedom
## Multiple R-squared:  0.4734, Adjusted R-squared:  0.4609 
## F-statistic: 37.98 on 4 and 169 DF,  p-value: < 2.2e-16

1. Uji Multikolinearitas

library(car)
library(ggcorrplot)
num_vars <- df_cleaned[, c("PDRB", "Gini", "Miskin", "RLS", "UMP")]
corr_matrix <- cor(num_vars, method = "pearson", use = "complete.obs")
print(round(corr_matrix, 3))
##          PDRB   Gini Miskin    RLS    UMP
## PDRB    1.000 -0.076 -0.431  0.468  0.505
## Gini   -0.076  1.000  0.257  0.008 -0.106
## Miskin -0.431  0.257  1.000 -0.339 -0.042
## RLS     0.468  0.008 -0.339  1.000  0.274
## UMP     0.505 -0.106 -0.042  0.274  1.000
# Heatmap korelasi
ggcorrplot(
  corr_matrix,
  method = "square",
  type = "lower",
  lab = TRUE,
  lab_size = 4,
  colors = c("#08306B", "white", "#4292C6"),
  title = "Heatmap Korelasi Pearson antar Variabel (Y = PDRB)",
  ggtheme = ggplot2::theme_minimal()
)

# VIF test
vif_values <- vif(ols_model_pdrb)
vif_df <- data.frame(Variabel = names(vif_values), VIF = as.numeric(vif_values))
library(car)
vif_values <- vif(ols_model_pdrb)
print(vif_values)
##     Gini   Miskin      RLS      UMP 
## 1.101713 1.232709 1.246895 1.104442
ggplot(vif_df, aes(x = reorder(Variabel, VIF), y = VIF, fill = Variabel)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_hline(yintercept = 10, linetype = "dashed", color = "red") +
  coord_flip() +
  labs(title = "Uji Multikolinearitas (VIF) — Model PDRB",
       x = "Variabel", y = "Nilai VIF") +
  theme_minimal() +
  theme(legend.position = "none")

#### 2. Uji Homoskedastisitas

library(lmtest)
bp_test_pdrb <- bptest(ols_model_pdrb)
print(bp_test_pdrb)
## 
##  studentized Breusch-Pagan test
## 
## data:  ols_model_pdrb
## BP = 30.11, df = 4, p-value = 4.649e-06
# Residual vs fitted
ggplot(data.frame(Fitted = fitted_ols, Residual = residuals_ols),
       aes(x = Fitted, y = Residual)) +
  geom_point(color = "#238B45", alpha = 0.6) +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  labs(title = "Residual vs Fitted (Uji Homoskedastisitas) — Y = PDRB",
       x = "Nilai Fitted", y = "Residual") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

2.1 Handling Asumsi Heteroskedastisitas

# HANDLING HETEROSKEDASTISITAS

# p < 0.05 → heteroskedastisitas, gunakan robust SE:
robust_ols_pdrb <- coeftest(ols_model_pdrb, vcov = vcovHC(ols_model_pdrb, type = "HC1"))
print(robust_ols_pdrb)
## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)  8.7726e+00  3.4886e-01 25.1461 < 2.2e-16 ***
## Gini         6.8225e-01  6.6741e-01  1.0222  0.308130    
## Miskin      -3.5281e-02  7.1749e-03 -4.9173 2.066e-06 ***
## RLS          1.3819e-01  4.6210e-02  2.9905  0.003202 ** 
## UMP          3.8401e-07  5.5079e-08  6.9719 6.719e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3. Uji Autokorelasi

dw_test_pdrb <- dwtest(ols_model_pdrb)
print(dw_test_pdrb)
## 
##  Durbin-Watson test
## 
## data:  ols_model_pdrb
## DW = 2.1182, p-value = 0.7546
## alternative hypothesis: true autocorrelation is greater than 0

Autokorelasi di OLS Saat Ini → Tidak Masalah Besar

karena model panel nantinya secara alami mengakomodasi struktur error antar waktu dan individu.

Dengan kata lain: kamu tidak perlu melakukan transformasi Prais–Winsten atau Cochrane–Orcutt sekarang.

4. Normalitas Residual

shapiro_result_pdrb <- shapiro.test(residuals_ols)
print(shapiro_result_pdrb)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals_ols
## W = 0.94557, p-value = 3.259e-06
# Histogram dan QQ plot
ggplot(data.frame(Residual = residuals_ols), aes(x = Residual)) +
  geom_histogram(aes(y = ..density..), fill = "skyblue3", color = "white", bins = 20) +
  geom_density(color = "red", size = 1) +
  labs(title = "Distribusi Residual (Y = PDRB)") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

qqnorm(residuals_ols, col = "steelblue")
qqline(residuals_ols, col = "red", lwd = 2)

Menurut Gujarati (2003, 2011) dan Wooldridge (2016):

Normalitas residual bukan asumsi utama BLUE (Best Linear Unbiased Estimator).

Asumsi normalitas hanya diperlukan jika kamu ingin:

Melakukan inferensi statistik dengan n kecil, atau

Menilai akurasi prediksi probabilistik (misal confidence interval).

Karena datamu adalah panel data (gabungan cross-section × time series), biasanya ukuran sampel cukup besar → pelanggaran normalitas tidak terlalu serius. Model panel nanti juga memperbaiki struktur error antar waktu & individu, sehingga residualnya bisa lebih mendekati normal.

Analisis Regresi Data Panel

library(plm)
library(lmtest)
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
library(ggplot2)
library(dplyr)
library(car)

# struktur panel
pdata <- pdata.frame(df_cleaned, index = c("Provinsi", "Tahun"))
# PEMBUATAN MODEL REGRESI PANEL

# Common Effect Model (CEM)
cem <- plm(PDRB ~ Gini + Miskin + RLS + UMP, data = pdata, model = "pooling")

# Fixed Effect Model (FEM) - two-ways
fem <- plm(PDRB ~ Gini + Miskin + RLS + UMP,
           data = pdata, model = "within", effect = "twoways")

# Random Effect Model (REM) - Amemiya
rem <- plm(PDRB ~ Gini + Miskin + RLS + UMP,
           data = pdata, model = "random", effect = "twoways", random.method = "amemiya")

# Estimasi
summary(cem)
## Pooling Model
## 
## Call:
## plm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = pdata, 
##     model = "pooling")
## 
## Unbalanced Panel: n = 38, T = 1-5, N = 174
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.766287 -0.225966 -0.073478  0.154831  0.988896 
## 
## Coefficients:
##                Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept)  8.7726e+00  4.0781e-01 21.5114 < 2.2e-16 ***
## Gini         6.8225e-01  6.9486e-01  0.9819  0.327576    
## Miskin      -3.5281e-02  6.2505e-03 -5.6445 6.864e-08 ***
## RLS          1.3819e-01  3.7420e-02  3.6929  0.000299 ***
## UMP          3.8401e-07  5.1950e-08  7.3918 6.390e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    45.486
## Residual Sum of Squares: 23.953
## R-Squared:      0.4734
## Adj. R-Squared: 0.46093
## F-statistic: 37.9811 on 4 and 169 DF, p-value: < 2.22e-16
summary(fem)
## Twoways effects Within Model
## 
## Call:
## plm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 38, T = 1-5, N = 174
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.2304267 -0.0189146 -0.0022826  0.0204327  0.2524991 
## 
## Coefficients:
##           Estimate  Std. Error t-value  Pr(>|t|)    
## Gini    7.3041e-02  6.5892e-01  0.1109    0.9119    
## Miskin -1.1522e-02  1.7816e-02 -0.6467    0.5190    
## RLS    -1.3938e-02  3.4165e-02 -0.4080    0.6840    
## UMP     5.6548e-07  1.3148e-07  4.3009 3.345e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    0.60551
## Residual Sum of Squares: 0.51934
## R-Squared:      0.14231
## Adj. R-Squared: -0.15922
## F-statistic: 5.30959 on 4 and 128 DF, p-value: 0.00054671
summary(rem)
## Twoways effects Random Effect Model 
##    (Amemiya's transformation)
## 
## Call:
## plm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = pdata, 
##     effect = "twoways", model = "random", random.method = "amemiya")
## 
## Unbalanced Panel: n = 38, T = 1-5, N = 174
## 
## Effects:
##                    var  std.dev share
## idiosyncratic 0.004057 0.063697 0.023
## individual    0.166341 0.407849 0.962
## time          0.002579 0.050788 0.015
## theta:
##            Min.   1st Qu.    Median      Mean   3rd Qu.      Max.
## id    0.8456916 0.9303244 0.9303244 0.9283788 0.9303244 0.9303244
## time  0.7897202 0.7897202 0.7897202 0.7921030 0.7897202 0.8006307
## total 0.7692689 0.7863292 0.7863292 0.7879972 0.7863292 0.7968724
## 
## Residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.2731 -0.2614 -0.0332  0.0336  0.1961  0.9487 
## 
## Coefficients:
##                Estimate  Std. Error z-value Pr(>|z|)
## (Intercept)  1.0127e+01  6.8441e+00  1.4796   0.1390
## Gini         2.0916e-01  9.5307e+00  0.0219   0.9825
## Miskin      -3.5472e-02  1.6575e-01 -0.2140   0.8305
## RLS         -2.6507e-02  4.2009e-01 -0.0631   0.9497
## UMP          4.8955e-07  1.2212e-06  0.4009   0.6885
## 
## Total Sum of Squares:    45.486
## Residual Sum of Squares: 27.515
## R-Squared:      0.39962
## Adj. R-Squared: 0.38541
## Chisq: 0.222023 on 4 DF, p-value: 0.99428
# PEMILIHAN MODEL

# Chow test → pooled vs fixed
pFtest(fem, cem)
## 
##  F test for twoways effects
## 
## data:  PDRB ~ Gini + Miskin + RLS + UMP
## F = 140.87, df1 = 41, df2 = 128, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Hausman test → fixed vs random
phtest(fem, rem)
## 
##  Hausman Test
## 
## data:  PDRB ~ Gini + Miskin + RLS + UMP
## chisq = 0.03228, df = 4, p-value = 0.9999
## alternative hypothesis: one model is inconsistent
# LM test → pooled vs random
plmtest(cem, type = "bp")
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  PDRB ~ Gini + Miskin + RLS + UMP
## chisq = 265.92, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects

Model terbaik untuk menjelaskan PDRB adalah Random Effect Model (REM).

library(ggplot2)
df_cleaned$fitted_REM <- fitted(rem)
ggplot(df_cleaned, aes(x = PDRB, y = fitted_REM)) +
  geom_point(color = "steelblue") +
  geom_smooth(method = "lm", se = FALSE, color = "darkred") +
  labs(title = "Fitted vs Actual (REM Model)",
       x = "Nilai Aktual ln(PDRB)",
       y = "Nilai Prediksi (Fitted)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Model REM melanggar tiga asumsi utama → terdapat heteroskedastisitas, autokorelasi, dan cross-section dependence. Namun, ini biasa terjadi pada data panel ekonomi makro antar provinsi seperti PDRB.