library(readxl)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(knitr)
#plot
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.3
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(ggplot2)
# uji asumsi
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lmtest)
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# modelling
library(plm)
## Warning: package 'plm' was built under R version 4.4.3
##
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
# Load dataset
data_panel <- read_excel("D:/UNY/MySta/SEM 5/Ekonometrika/Data Panel 2020-2024.xlsx")
data_panel
## # A tibble: 190 × 7
## Provinsi Tahun Gini PDRB Miskin RLS UMP
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACEH 2020 0.323 31633 15.0 9.71 3165031
## 2 SUMATERA UTARA 2020 0.316 54979 8.75 9.83 2499423
## 3 SUMATERA BARAT 2020 0.305 43826 6.28 9.34 2484041
## 4 RIAU 2020 0.329 114167 6.82 9.47 2888564
## 5 JAMBI 2020 0.32 57958 7.58 8.97 2630162
## 6 SUMATERA SELATAN 2020 0.339 53843 12.7 8.68 3043111
## 7 BENGKULU 2020 0.334 36552 15.0 9.2 2213604
## 8 LAMPUNG 2020 0.327 39290 12.3 8.51 2432002
## 9 KEP. BANGKA BELITUNG 2020 0.262 52023 4.53 8.49 3230024
## 10 KEP. RIAU 2020 0.339 123465 5.92 10.2 3005460
## # ℹ 180 more rows
# Jumlah observasi dan variabel
dim(data_panel)
## [1] 190 7
# Lihat struktur data
str(data_panel)
## tibble [190 × 7] (S3: tbl_df/tbl/data.frame)
## $ Provinsi: chr [1:190] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
## $ Tahun : num [1:190] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ Gini : num [1:190] 0.323 0.316 0.305 0.329 0.32 0.339 0.334 0.327 0.262 0.339 ...
## $ PDRB : num [1:190] 31633 54979 43826 114167 57958 ...
## $ Miskin : num [1:190] 14.99 8.75 6.28 6.82 7.58 ...
## $ RLS : num [1:190] 9.71 9.83 9.34 9.47 8.97 ...
## $ UMP : num [1:190] 3165031 2499423 2484041 2888564 2630162 ...
# Ringkasan statistik dasar
summary(data_panel)
## Provinsi Tahun Gini PDRB
## Length:190 Min. :2020 Min. :0.2360 Min. : 16870
## Class :character 1st Qu.:2021 1st Qu.:0.3160 1st Qu.: 43763
## Mode :character Median :2022 Median :0.3400 Median : 57294
## Mean :2022 Mean :0.3453 Mean : 74329
## 3rd Qu.:2023 3rd Qu.:0.3728 3rd Qu.: 73844
## Max. :2024 Max. :0.4490 Max. :344350
## NA's :16 NA's :12
## Miskin RLS UMP
## Min. : 3.780 Min. : 5.100 Min. :1704608
## 1st Qu.: 6.320 1st Qu.: 8.580 1st Qu.:2460997
## Median : 8.735 Median : 9.250 Median :2812138
## Mean :10.548 Mean : 9.219 Mean :2834718
## 3rd Qu.:12.980 3rd Qu.: 9.820 3rd Qu.:3189000
## Max. :32.970 Max. :11.490 Max. :5067381
## NA's :16 NA's :16 NA's :16
# Cek missing per kolom
colSums(is.na(data_panel))
## Provinsi Tahun Gini PDRB Miskin RLS UMP
## 0 0 16 12 16 16 16
# Persentase missing per kolom
sapply(data_panel, function(x) mean(is.na(x))) * 100
## Provinsi Tahun Gini PDRB Miskin RLS UMP
## 0.000000 0.000000 8.421053 6.315789 8.421053 8.421053 8.421053
# Cek duplikat
dup <- data_panel %>%
group_by(Provinsi, Tahun) %>%
filter(n() > 1)
nrow(dup)
## [1] 0
# Cek Jumlah Observasi Tiap Provinsi dan Tahun
data_panel %>%
count(Provinsi) %>%
arrange(desc(n))
## # A tibble: 38 × 2
## Provinsi n
## <chr> <int>
## 1 ACEH 5
## 2 BALI 5
## 3 BANTEN 5
## 4 BENGKULU 5
## 5 DI YOGYAKARTA 5
## 6 DKI JAKARTA 5
## 7 GORONTALO 5
## 8 JAMBI 5
## 9 JAWA BARAT 5
## 10 JAWA TENGAH 5
## # ℹ 28 more rows
data_panel %>%
count(Tahun)
## # A tibble: 5 × 2
## Tahun n
## <dbl> <int>
## 1 2020 38
## 2 2021 38
## 3 2022 38
## 4 2023 38
## 5 2024 38
# cek tipe data
sapply(data_panel, class)
## Provinsi Tahun Gini PDRB Miskin RLS
## "character" "numeric" "numeric" "numeric" "numeric" "numeric"
## UMP
## "numeric"
# Cek Outlier
cek_outlier <- function(x) {
stats <- boxplot.stats(x)
return(length(stats$out))
}
sapply(data_panel[, c("Gini", "PDRB", "Miskin", "RLS", "UMP")], cek_outlier)
## Gini PDRB Miskin RLS UMP
## 0 26 6 2 4
# Distribusi tiap variabel
data_panel %>%
pivot_longer(
cols = c(Gini, PDRB, Miskin, RLS, UMP),
names_to = "Variabel",
values_to = "Nilai"
) %>%
ggplot(aes(x = "", y = Nilai, fill = "skyblue")) +
geom_boxplot(color = "black", fill = "skyblue") +
facet_wrap(~Variabel, scales = "free", ncol = 3) +
labs(
title = "Distribusi Variabel Ekonomi 2020–2024",
x = NULL,
y = "Nilai"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
strip.text = element_text(face = "bold", size = 11),
plot.title = element_text(face = "bold", hjust = 0.5)
)
## Warning: Removed 76 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# Heatmap Rasio Gini Tiap Provinsi & Tahun
ggplot(data_panel, aes(x = factor(Tahun), y = reorder(Provinsi, Gini), fill = Gini)) +
geom_tile(color = "white", width = 0.9, height = 0.9) + # kotak lebih kecil, jadi persegi
scale_fill_gradient(low = "#DEEBF7", high = "#08306B", name = "Gini") + # biru tua → biru muda
labs(
title = "Heatmap Gini Ratio per Provinsi (2020–2024)",
x = "Tahun",
y = "Provinsi"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 10),
plot.title = element_text(hjust = 0.5, face = "bold"),
panel.grid = element_blank(),
legend.position = "right"
)
### Data Preprocessing
# Load dataset
data_panel <- read_excel("D:/UNY/MySta/SEM 5/Ekonometrika/Data Panel 2020-2024.xlsx")
# Handling Missing Value
# Sebelum handling missing value
missing_before <- colSums(is.na(data_panel))
# Handling missing value
data_panel <- data_panel %>%
filter(complete.cases(.))
# Sesudah handling missing value
missing_after <- colSums(is.na(data_panel))
# tabel perbandingan
tabel_missing <- data.frame(
Variabel = names(missing_before),
Missing_Sebelum = missing_before,
Missing_Sesudah = missing_after,
Selisih = missing_before - missing_after
)
print(tabel_missing)
## Variabel Missing_Sebelum Missing_Sesudah Selisih
## Provinsi Provinsi 0 0 0
## Tahun Tahun 0 0 0
## Gini Gini 16 0 16
## PDRB PDRB 12 0 12
## Miskin Miskin 16 0 16
## RLS RLS 16 0 16
## UMP UMP 16 0 16
# Handling Ourlier
# jumlah outlier sebelum handling
cek_outlier <- function(x) {
stats <- boxplot.stats(x)
return(length(stats$out))
}
before_outlier <- sapply(data_panel[, c("Gini", "PDRB", "Miskin", "RLS", "UMP")], cek_outlier)
# andling Outlier
winsorize_iqr <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_value <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_value
upper <- Q3 + 1.5 * IQR_value
x[x < lower] <- lower
x[x > upper] <- upper
return(x)
}
# PDRB khusus: log-transform dulu, lalu winsorize_iqr
data_after_outlier <- data_panel %>%
mutate(
Gini = winsorize_iqr(Gini),
PDRB = winsorize_iqr(log(PDRB)), # log transform untuk stabilisasi
Miskin = winsorize_iqr(Miskin),
RLS = winsorize_iqr(RLS),
UMP = winsorize_iqr(UMP)
)
# jumlah outlier sesudah handling
after_outlier <- sapply(data_after_outlier[, c("Gini", "PDRB", "Miskin", "RLS", "UMP")], cek_outlier)
# Tabel perbandingan
tabel_outlier <- data.frame(
Variabel = names(before_outlier),
Jumlah_Outlier_Sebelum = before_outlier,
Jumlah_Outlier_Sesudah = after_outlier
)
tabel_outlier <- tabel_outlier %>%
mutate(Selisih = Jumlah_Outlier_Sebelum - Jumlah_Outlier_Sesudah)
print(tabel_outlier)
## Variabel Jumlah_Outlier_Sebelum Jumlah_Outlier_Sesudah Selisih
## Gini Gini 0 0 0
## PDRB PDRB 26 0 26
## Miskin Miskin 6 0 6
## RLS RLS 2 0 2
## UMP UMP 4 0 4
df_cleaned <- data_after_outlier
# MODEL DASAR OLS untuk cek asumsi
ols_model_pdrb <- lm(PDRB ~ Gini + Miskin + RLS + UMP, data = df_cleaned)
# Simpan residual dan fitted untuk plot selanjutnya
residuals_ols <- residuals(ols_model_pdrb)
fitted_ols <- fitted(ols_model_pdrb)
summary(ols_model_pdrb)
##
## Call:
## lm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = df_cleaned)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76629 -0.22597 -0.07348 0.15483 0.98890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.773e+00 4.078e-01 21.511 < 2e-16 ***
## Gini 6.823e-01 6.949e-01 0.982 0.327576
## Miskin -3.528e-02 6.251e-03 -5.644 6.86e-08 ***
## RLS 1.382e-01 3.742e-02 3.693 0.000299 ***
## UMP 3.840e-07 5.195e-08 7.392 6.39e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3765 on 169 degrees of freedom
## Multiple R-squared: 0.4734, Adjusted R-squared: 0.4609
## F-statistic: 37.98 on 4 and 169 DF, p-value: < 2.2e-16
library(car)
library(ggcorrplot)
num_vars <- df_cleaned[, c("PDRB", "Gini", "Miskin", "RLS", "UMP")]
corr_matrix <- cor(num_vars, method = "pearson", use = "complete.obs")
print(round(corr_matrix, 3))
## PDRB Gini Miskin RLS UMP
## PDRB 1.000 -0.076 -0.431 0.468 0.505
## Gini -0.076 1.000 0.257 0.008 -0.106
## Miskin -0.431 0.257 1.000 -0.339 -0.042
## RLS 0.468 0.008 -0.339 1.000 0.274
## UMP 0.505 -0.106 -0.042 0.274 1.000
# Heatmap korelasi
ggcorrplot(
corr_matrix,
method = "square",
type = "lower",
lab = TRUE,
lab_size = 4,
colors = c("#08306B", "white", "#4292C6"),
title = "Heatmap Korelasi Pearson antar Variabel (Y = PDRB)",
ggtheme = ggplot2::theme_minimal()
)
# VIF test
vif_values <- vif(ols_model_pdrb)
vif_df <- data.frame(Variabel = names(vif_values), VIF = as.numeric(vif_values))
library(car)
vif_values <- vif(ols_model_pdrb)
print(vif_values)
## Gini Miskin RLS UMP
## 1.101713 1.232709 1.246895 1.104442
ggplot(vif_df, aes(x = reorder(Variabel, VIF), y = VIF, fill = Variabel)) +
geom_bar(stat = "identity", width = 0.7) +
geom_hline(yintercept = 10, linetype = "dashed", color = "red") +
coord_flip() +
labs(title = "Uji Multikolinearitas (VIF) — Model PDRB",
x = "Variabel", y = "Nilai VIF") +
theme_minimal() +
theme(legend.position = "none")
#### 2. Uji Homoskedastisitas
library(lmtest)
bp_test_pdrb <- bptest(ols_model_pdrb)
print(bp_test_pdrb)
##
## studentized Breusch-Pagan test
##
## data: ols_model_pdrb
## BP = 30.11, df = 4, p-value = 4.649e-06
# Residual vs fitted
ggplot(data.frame(Fitted = fitted_ols, Residual = residuals_ols),
aes(x = Fitted, y = Residual)) +
geom_point(color = "#238B45", alpha = 0.6) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
geom_smooth(method = "loess", se = FALSE, color = "blue") +
labs(title = "Residual vs Fitted (Uji Homoskedastisitas) — Y = PDRB",
x = "Nilai Fitted", y = "Residual") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# HANDLING HETEROSKEDASTISITAS
# p < 0.05 → heteroskedastisitas, gunakan robust SE:
robust_ols_pdrb <- coeftest(ols_model_pdrb, vcov = vcovHC(ols_model_pdrb, type = "HC1"))
print(robust_ols_pdrb)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.7726e+00 3.4886e-01 25.1461 < 2.2e-16 ***
## Gini 6.8225e-01 6.6741e-01 1.0222 0.308130
## Miskin -3.5281e-02 7.1749e-03 -4.9173 2.066e-06 ***
## RLS 1.3819e-01 4.6210e-02 2.9905 0.003202 **
## UMP 3.8401e-07 5.5079e-08 6.9719 6.719e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dw_test_pdrb <- dwtest(ols_model_pdrb)
print(dw_test_pdrb)
##
## Durbin-Watson test
##
## data: ols_model_pdrb
## DW = 2.1182, p-value = 0.7546
## alternative hypothesis: true autocorrelation is greater than 0
Autokorelasi di OLS Saat Ini → Tidak Masalah Besar
karena model panel nantinya secara alami mengakomodasi struktur error antar waktu dan individu.
Dengan kata lain: kamu tidak perlu melakukan transformasi Prais–Winsten atau Cochrane–Orcutt sekarang.
shapiro_result_pdrb <- shapiro.test(residuals_ols)
print(shapiro_result_pdrb)
##
## Shapiro-Wilk normality test
##
## data: residuals_ols
## W = 0.94557, p-value = 3.259e-06
# Histogram dan QQ plot
ggplot(data.frame(Residual = residuals_ols), aes(x = Residual)) +
geom_histogram(aes(y = ..density..), fill = "skyblue3", color = "white", bins = 20) +
geom_density(color = "red", size = 1) +
labs(title = "Distribusi Residual (Y = PDRB)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
qqnorm(residuals_ols, col = "steelblue")
qqline(residuals_ols, col = "red", lwd = 2)
Menurut Gujarati (2003, 2011) dan Wooldridge (2016):
Normalitas residual bukan asumsi utama BLUE (Best Linear Unbiased Estimator).
Asumsi normalitas hanya diperlukan jika kamu ingin:
Melakukan inferensi statistik dengan n kecil, atau
Menilai akurasi prediksi probabilistik (misal confidence interval).
Karena datamu adalah panel data (gabungan cross-section × time series), biasanya ukuran sampel cukup besar → pelanggaran normalitas tidak terlalu serius. Model panel nanti juga memperbaiki struktur error antar waktu & individu, sehingga residualnya bisa lebih mendekati normal.
library(plm)
library(lmtest)
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
library(ggplot2)
library(dplyr)
library(car)
# struktur panel
pdata <- pdata.frame(df_cleaned, index = c("Provinsi", "Tahun"))
# PEMBUATAN MODEL REGRESI PANEL
# Common Effect Model (CEM)
cem <- plm(PDRB ~ Gini + Miskin + RLS + UMP, data = pdata, model = "pooling")
# Fixed Effect Model (FEM) - two-ways
fem <- plm(PDRB ~ Gini + Miskin + RLS + UMP,
data = pdata, model = "within", effect = "twoways")
# Random Effect Model (REM) - Amemiya
rem <- plm(PDRB ~ Gini + Miskin + RLS + UMP,
data = pdata, model = "random", effect = "twoways", random.method = "amemiya")
# Estimasi
summary(cem)
## Pooling Model
##
## Call:
## plm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = pdata,
## model = "pooling")
##
## Unbalanced Panel: n = 38, T = 1-5, N = 174
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.766287 -0.225966 -0.073478 0.154831 0.988896
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 8.7726e+00 4.0781e-01 21.5114 < 2.2e-16 ***
## Gini 6.8225e-01 6.9486e-01 0.9819 0.327576
## Miskin -3.5281e-02 6.2505e-03 -5.6445 6.864e-08 ***
## RLS 1.3819e-01 3.7420e-02 3.6929 0.000299 ***
## UMP 3.8401e-07 5.1950e-08 7.3918 6.390e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 45.486
## Residual Sum of Squares: 23.953
## R-Squared: 0.4734
## Adj. R-Squared: 0.46093
## F-statistic: 37.9811 on 4 and 169 DF, p-value: < 2.22e-16
summary(fem)
## Twoways effects Within Model
##
## Call:
## plm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = pdata,
## effect = "twoways", model = "within")
##
## Unbalanced Panel: n = 38, T = 1-5, N = 174
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.2304267 -0.0189146 -0.0022826 0.0204327 0.2524991
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Gini 7.3041e-02 6.5892e-01 0.1109 0.9119
## Miskin -1.1522e-02 1.7816e-02 -0.6467 0.5190
## RLS -1.3938e-02 3.4165e-02 -0.4080 0.6840
## UMP 5.6548e-07 1.3148e-07 4.3009 3.345e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.60551
## Residual Sum of Squares: 0.51934
## R-Squared: 0.14231
## Adj. R-Squared: -0.15922
## F-statistic: 5.30959 on 4 and 128 DF, p-value: 0.00054671
summary(rem)
## Twoways effects Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = PDRB ~ Gini + Miskin + RLS + UMP, data = pdata,
## effect = "twoways", model = "random", random.method = "amemiya")
##
## Unbalanced Panel: n = 38, T = 1-5, N = 174
##
## Effects:
## var std.dev share
## idiosyncratic 0.004057 0.063697 0.023
## individual 0.166341 0.407849 0.962
## time 0.002579 0.050788 0.015
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## id 0.8456916 0.9303244 0.9303244 0.9283788 0.9303244 0.9303244
## time 0.7897202 0.7897202 0.7897202 0.7921030 0.7897202 0.8006307
## total 0.7692689 0.7863292 0.7863292 0.7879972 0.7863292 0.7968724
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.2731 -0.2614 -0.0332 0.0336 0.1961 0.9487
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.0127e+01 6.8441e+00 1.4796 0.1390
## Gini 2.0916e-01 9.5307e+00 0.0219 0.9825
## Miskin -3.5472e-02 1.6575e-01 -0.2140 0.8305
## RLS -2.6507e-02 4.2009e-01 -0.0631 0.9497
## UMP 4.8955e-07 1.2212e-06 0.4009 0.6885
##
## Total Sum of Squares: 45.486
## Residual Sum of Squares: 27.515
## R-Squared: 0.39962
## Adj. R-Squared: 0.38541
## Chisq: 0.222023 on 4 DF, p-value: 0.99428
# PEMILIHAN MODEL
# Chow test → pooled vs fixed
pFtest(fem, cem)
##
## F test for twoways effects
##
## data: PDRB ~ Gini + Miskin + RLS + UMP
## F = 140.87, df1 = 41, df2 = 128, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Hausman test → fixed vs random
phtest(fem, rem)
##
## Hausman Test
##
## data: PDRB ~ Gini + Miskin + RLS + UMP
## chisq = 0.03228, df = 4, p-value = 0.9999
## alternative hypothesis: one model is inconsistent
# LM test → pooled vs random
plmtest(cem, type = "bp")
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: PDRB ~ Gini + Miskin + RLS + UMP
## chisq = 265.92, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
Model terbaik untuk menjelaskan PDRB adalah Random Effect Model (REM).
library(ggplot2)
df_cleaned$fitted_REM <- fitted(rem)
ggplot(df_cleaned, aes(x = PDRB, y = fitted_REM)) +
geom_point(color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "darkred") +
labs(title = "Fitted vs Actual (REM Model)",
x = "Nilai Aktual ln(PDRB)",
y = "Nilai Prediksi (Fitted)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Model REM melanggar tiga asumsi utama → terdapat heteroskedastisitas,
autokorelasi, dan cross-section dependence. Namun, ini biasa terjadi
pada data panel ekonomi makro antar provinsi seperti PDRB.