library(plm)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lmtest)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
Data <- read_excel("D:/Fathoni/Projects/Semester 6/ADP/Data rapih.xlsx")
Data
## # A tibble: 306 × 7
## PROVINSI Tahun Y X1 X2 X3 X4
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACEH 2016 22835 7.57 13627 16.7 69.6
## 2 ACEH 2017 23363 6.57 14809 16.9 69.6
## 3 ACEH 2018 24014 6.34 13814 16.0 69.7
## 4 ACEH 2019 24842 6.17 15065 15.3 69.9
## 5 ACEH 2020 25018 6.59 18099 15.0 70.0
## 6 ACEH 2021 25356 6.3 17037 15.3 70.0
## 7 ACEH 2022 26062 6.17 16772 14.6 70.2
## 8 ACEH 2023 26800 6.03 17585 14.4 70.4
## 9 ACEH 2024 27684 5.75 17547 14.2 70.5
## 10 SUMATERA UTARA 2016 32885 5.84 11646 10.4 68.4
## # ℹ 296 more rows
summary(Data)
## PROVINSI Tahun Y X1
## Length:306 Min. :2016 Min. : 11469 Min. : 1.400
## Class :character 1st Qu.:2018 1st Qu.: 26204 1st Qu.: 3.770
## Mode :character Median :2020 Median : 34184 Median : 4.625
## Mean :2020 Mean : 43955 Mean : 5.020
## 3rd Qu.:2022 3rd Qu.: 41848 3rd Qu.: 6.168
## Max. :2024 Max. :201315 Max. :10.950
## X2 X3 X4
## Min. : 9940 Min. : 3.470 Min. :64.34
## 1st Qu.:14631 1st Qu.: 6.320 1st Qu.:68.56
## Median :16834 Median : 8.865 Median :70.17
## Mean :17778 Mean :10.583 Mean :70.10
## 3rd Qu.:20228 3rd Qu.:13.525 3rd Qu.:71.64
## Max. :42354 Max. :28.540 Max. :75.53
str(Data)
## tibble [306 × 7] (S3: tbl_df/tbl/data.frame)
## $ PROVINSI: chr [1:306] "ACEH" "ACEH" "ACEH" "ACEH" ...
## $ Tahun : num [1:306] 2016 2017 2018 2019 2020 ...
## $ Y : num [1:306] 22835 23363 24014 24842 25018 ...
## $ X1 : num [1:306] 7.57 6.57 6.34 6.17 6.59 6.3 6.17 6.03 5.75 5.84 ...
## $ X2 : num [1:306] 13627 14809 13814 15065 18099 ...
## $ X3 : num [1:306] 16.7 16.9 16 15.3 15 ...
## $ X4 : num [1:306] 69.6 69.6 69.7 69.9 70 ...
Data sudah dalam bentuk numeric, sehingga tidak perlu dilakukan konversi tipe data.
sum(is.na(Data))
## [1] 0
ringkasan_waktu <- Data %>%
group_by(Tahun) %>%
summarise(across(c(Y, X1, X2, X3, X4),
list(Mean = ~mean(., na.rm = TRUE),
SD = ~sd(., na.rm = TRUE)),
.names = "{.col}_{.fn}"))
print(ringkasan_waktu)
## # A tibble: 9 × 11
## Tahun Y_Mean Y_SD X1_Mean X1_SD X2_Mean X2_SD X3_Mean X3_SD X4_Mean X4_SD
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016 39246. 29891. 4.99 1.93 15115. 3724. 11.5 6.20 69.4 2.66
## 2 2017 40496. 30675. 5.10 1.84 15698. 3618. 11.3 6.01 69.4 2.65
## 3 2018 41998. 31536. 4.80 1.71 15911. 3890. 10.8 5.78 69.6 2.61
## 4 2019 43633. 33172. 4.71 1.56 16653. 3700. 10.5 5.65 69.9 2.55
## 5 2020 42502. 32213. 6.03 2.01 19538. 4722. 10.4 5.44 70.1 2.52
## 6 2021 43638 32892. 5.49 1.82 19423. 4639. 10.8 5.39 70.2 2.51
## 7 2022 45540. 34255. 4.97 1.60 17901. 4181. 10.2 5.25 70.4 2.45
## 8 2023 48179. 36033. 4.61 1.42 19662. 5664. 10.1 5.18 70.8 2.42
## 9 2024 50366. 37743. 4.47 1.32 20105. 4889. 9.57 4.57 71.0 2.36
ggplot(ringkasan_waktu, aes(x = Tahun, y = Y_Mean)) +
geom_line(color = "darkblue", linewidth = 1.2) +
geom_point(size = 3) +
labs(
title = "Tren Rata-rata Pertumbuhan Ekonomi (Y) per Tahun",
subtitle = "Analisis Deskripsi: Perbedaan antar Waktu",
x = "Tahun",
y = "Nilai Y"
) +
theme_minimal()
ringkasan_objek <- Data %>%
group_by(PROVINSI) %>%
summarise(across(c(Y, X1, X2, X3, X4),
list(Mean = ~mean(., na.rm = TRUE),
SD = ~sd(., na.rm = TRUE)),
.names = "{.col}_{.fn}"))
print(ringkasan_objek)
## # A tibble: 34 × 11
## PROVINSI Y_Mean Y_SD X1_Mean X1_SD X2_Mean X2_SD X3_Mean X3_SD X4_Mean
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACEH 2.51e4 1579. 6.39 0.514 16039. 1720. 15.4 0.958 70.0
## 2 BALI 3.51e4 1822. 2.96 1.78 16934. 1357. 4.16 0.287 72.2
## 3 BANTEN 3.71e4 3766. 8.52 1.13 22741. 2219. 5.77 0.512 70.1
## 4 BENGKULU 2.34e4 1500. 3.50 0.294 15934. 1829. 15.2 1.15 69.3
## 5 DI YOGYAKARTA 2.81e4 3261. 3.63 0.655 14445. 1984. 12.1 0.892 75.0
## 6 DKI JAKARTA 1.74e5 16208. 7.31 1.54 29654. 6320. 4.14 0.496 73.1
## 7 GORONTALO 2.40e4 2113. 3.40 0.632 13939. 1176. 16.0 1.14 68.1
## 8 JAMBI 4.24e4 3250. 4.39 0.509 15480. 1651. 7.88 0.542 71.2
## 9 JAWA BARAT 3.07e4 2536. 8.46 1.14 18343. 2173. 7.94 0.659 73.3
## 10 JAWA TENGAH 2.76e4 1739. 5.11 0.732 12191. 1364. 11.5 0.996 74.4
## # ℹ 24 more rows
## # ℹ 1 more variable: X4_SD <dbl>
ggplot(ringkasan_objek, aes(x = fct_reorder(PROVINSI, Y_Mean), y = Y_Mean)) +
geom_col(fill = "steelblue") +
coord_flip() + # Memutar grafik agar nama provinsi terbaca jelas di samping
labs(
title = "Perbandingan Rata-rata Y antar Provinsi",
subtitle = "Eksplorasi Perbedaan antar Objek (Individual Effects)",
x = "Provinsi",
y = "Rata-rata Y"
) +
theme_minimal()
model_rem_full <- plm(Y ~ X1 + X2 + X3 + X4, data = Data, model = "random")
summary(model_rem_full)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = Y ~ X1 + X2 + X3 + X4, data = Data, model = "random")
##
## Balanced Panel: n = 34, T = 9, N = 306
##
## Effects:
## var std.dev share
## idiosyncratic 14964104 3868 0.033
## individual 433562688 20822 0.967
## theta: 0.9382
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -12266.50 -1926.13 -281.67 1020.55 24787.17
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -2.5580e+05 4.9632e+04 -5.1539 2.552e-07 ***
## X1 -1.0657e+03 3.0678e+02 -3.4739 0.000513 ***
## X2 6.3848e-01 1.3260e-01 4.8152 1.470e-06 ***
## X3 5.4271e+02 3.4048e+02 1.5939 0.110950
## X4 4.1084e+03 6.8596e+02 5.9893 2.107e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 8729400000
## Residual Sum of Squares: 5034200000
## R-Squared: 0.42331
## Adj. R-Squared: 0.41564
## Chisq: 220.943 on 4 DF, p-value: < 2.22e-16
Terlihat bahwa variabel X1, X2, dan X4 memiliki nilai p-value yang signifikan (p < 0.05), Sedangkan variabel X3 tidak signifikan (p > 0.05). Oleh karena itu, model REM yang akan digunakan untuk analisis selanjutnya adalah model yang hanya memasukkan variabel X1, X2, dan X4.
model_rem <- plm(Y ~ X1 + X2 + X4, data = Data, model = "random")
summary(model_rem)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = Y ~ X1 + X2 + X4, data = Data, model = "random")
##
## Balanced Panel: n = 34, T = 9, N = 306
##
## Effects:
## var std.dev share
## idiosyncratic 15244909 3904 0.034
## individual 428212456 20693 0.966
## theta: 0.9372
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -12564.07 -1932.68 -331.94 888.43 25308.25
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -2.0017e+05 3.5374e+04 -5.6588 1.524e-08 ***
## X1 -1.1277e+03 3.0561e+02 -3.6898 0.0002244 ***
## X2 6.8672e-01 1.2994e-01 5.2848 1.258e-07 ***
## X4 3.3891e+03 5.1833e+02 6.5385 6.215e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 8768200000
## Residual Sum of Squares: 5.107e+09
## R-Squared: 0.41756
## Adj. R-Squared: 0.41177
## Chisq: 216.505 on 3 DF, p-value: < 2.22e-16
residuals_rem <- resid(model_rem)
shapiro_test <- shapiro.test(residuals_rem)
print(shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: residuals_rem
## W = 0.86497, p-value = 1.061e-15
Terlihat bahwa nilai p-value dari uji Shapiro-Wilk adalah 0.000, yang menunjukkan bahwa residual tidak berdistribusi normal.
bptest(model_rem)
##
## studentized Breusch-Pagan test
##
## data: model_rem
## BP = 95.742, df = 3, p-value < 2.2e-16
Nilai p-value dari uji Breusch-Pagan adalah 0.000, yang menunjukkan adanya heteroskedastisitas dalam model. Ragam residual tidak konstan, sehingga menyebabkan hasil estimasi menjadi tidak efisien.
pdwtest(model_rem)
##
## Durbin-Watson test for serial correlation in panel models
##
## data: Y ~ X1 + X2 + X4
## DW = 0.90678, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
pbgtest(model_rem)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: Y ~ X1 + X2 + X4
## chisq = 113.13, df = 9, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
Nilai p-value dari uji Durbin-Watson adalah 0.000, yang menunjukkan adanya autokorelasi positif dalam model. Hal ini berarti bahwa residual pada suatu periode cenderung berkorelasi dengan residual pada periode sebelumnya, sehingga hasil estimasi menjadi tidak efisien.
Model Random Effects (REM) menunjukkan bahwa variabel X1, X2, X3, dan X4 memiliki pengaruh signifikan terhadap variabel dependen Y. Namun, model ini tidak memenuhi asumsi normalitas, homoskedastisitas, dan tidak adanya autokorelasi. Oleh karena itu, hasil estimasi dari model REM mungkin tidak efisien dan perlu dilakukan perbaikan model atau menggunakan metode estimasi yang lebih sesuai untuk mengatasi masalah tersebut.
model_lm <- lm(Y ~ X1 + X2 + X3 + X4, data = Data)
boxcox_result <- boxcox(model_lm,
lambda = seq(-2,2,0.1))
lambda_opt <- boxcox_result$x[which.max(boxcox_result$y)]
lambda_opt
## [1] -0.3838384
Lambda optimal yang diperoleh dari uji Box-Cox adalah -0.3838384, yang menunjukkan bahwa transformasi logaritma dapat digunakan untuk memperbaiki model. Oleh karena itu, model REM dengan transformasi log pada variabel dependen Y akan dibuat untuk mengatasi pelanggaran asumsi.
lambda <- -0.3838384
data <- Data %>%
mutate(Y_trans = (Y^lambda - 1) / lambda)
data_panel <- pdata.frame(data, index = c("PROVINSI","Tahun"))
model_rem_trans <- plm(Y_trans ~ X1 + X2 + X4,
data = data_panel,
model = "random")
summary(model_rem_trans)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = Y_trans ~ X1 + X2 + X4, data = data_panel, model = "random")
##
## Balanced Panel: n = 34, T = 9, N = 306
##
## Effects:
## var std.dev share
## idiosyncratic 1.691e-06 1.300e-03 0.049
## individual 3.286e-05 5.732e-03 0.951
## theta: 0.9246
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -5.0628e-03 -7.1758e-04 -5.1358e-05 5.0266e-04 6.9885e-03
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 2.4231e+00 1.1433e-02 211.9494 < 2e-16 ***
## X1 -2.1116e-04 1.0119e-04 -2.0869 0.03690 *
## X2 8.0835e-08 4.2612e-08 1.8970 0.05782 .
## X4 1.9219e-03 1.6775e-04 11.4570 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 0.0010908
## Residual Sum of Squares: 0.00056493
## R-Squared: 0.48212
## Adj. R-Squared: 0.47697
## Chisq: 281.141 on 3 DF, p-value: < 2.22e-16
model_gls <- plm(Y ~ X1 + X2 + X4, data = Data, model = "random", method = "gls")
summary(model_gls)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = Y ~ X1 + X2 + X4, data = Data, model = "random",
## method = "gls")
##
## Balanced Panel: n = 34, T = 9, N = 306
##
## Effects:
## var std.dev share
## idiosyncratic 15244909 3904 0.034
## individual 428212456 20693 0.966
## theta: 0.9372
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -12564.07 -1932.68 -331.94 888.43 25308.25
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -2.0017e+05 3.5374e+04 -5.6588 1.524e-08 ***
## X1 -1.1277e+03 3.0561e+02 -3.6898 0.0002244 ***
## X2 6.8672e-01 1.2994e-01 5.2848 1.258e-07 ***
## X4 3.3891e+03 5.1833e+02 6.5385 6.215e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 8768200000
## Residual Sum of Squares: 5.107e+09
## R-Squared: 0.41756
## Adj. R-Squared: 0.41177
## Chisq: 216.505 on 3 DF, p-value: < 2.22e-16
model_fgls <- plm(Y ~ X1 + X2 + X4, data = Data, model = "random", method = "fgls")
summary(model_fgls)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = Y ~ X1 + X2 + X4, data = Data, model = "random",
## method = "fgls")
##
## Balanced Panel: n = 34, T = 9, N = 306
##
## Effects:
## var std.dev share
## idiosyncratic 15244909 3904 0.034
## individual 428212456 20693 0.966
## theta: 0.9372
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -12564.07 -1932.68 -331.94 888.43 25308.25
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -2.0017e+05 3.5374e+04 -5.6588 1.524e-08 ***
## X1 -1.1277e+03 3.0561e+02 -3.6898 0.0002244 ***
## X2 6.8672e-01 1.2994e-01 5.2848 1.258e-07 ***
## X4 3.3891e+03 5.1833e+02 6.5385 6.215e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 8768200000
## Residual Sum of Squares: 5.107e+09
## R-Squared: 0.41756
## Adj. R-Squared: 0.41177
## Chisq: 216.505 on 3 DF, p-value: < 2.22e-16
residuals_log <- resid(model_rem_trans)
shapiro_test_log <- shapiro.test(residuals_log)
print(shapiro_test_log)
##
## Shapiro-Wilk normality test
##
## data: residuals_log
## W = 0.92646, p-value = 3.85e-11
bptest(model_rem_trans)
##
## studentized Breusch-Pagan test
##
## data: model_rem_trans
## BP = 13.74, df = 3, p-value = 0.003282
pdwtest(model_rem_trans)
##
## Durbin-Watson test for serial correlation in panel models
##
## data: Y_trans ~ X1 + X2 + X4
## DW = 0.68808, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
residuals_gls <- resid(model_gls)
shapiro_test_gls <- shapiro.test(residuals_gls)
print(shapiro_test_gls)
##
## Shapiro-Wilk normality test
##
## data: residuals_gls
## W = 0.86497, p-value = 1.061e-15
bptest(model_gls)
##
## studentized Breusch-Pagan test
##
## data: model_gls
## BP = 95.742, df = 3, p-value < 2.2e-16
pdwtest(model_gls)
##
## Durbin-Watson test for serial correlation in panel models
##
## data: Y ~ X1 + X2 + X4
## DW = 0.90678, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
residuals_fgls <- resid(model_fgls)
shapiro_test_fgls <- shapiro.test(residuals_fgls)
print(shapiro_test_fgls)
##
## Shapiro-Wilk normality test
##
## data: residuals_fgls
## W = 0.86497, p-value = 1.061e-15
bptest(model_fgls)
##
## studentized Breusch-Pagan test
##
## data: model_fgls
## BP = 95.742, df = 3, p-value < 2.2e-16
pdwtest(model_fgls)
##
## Durbin-Watson test for serial correlation in panel models
##
## data: Y ~ X1 + X2 + X4
## DW = 0.90678, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
# Ringkasan Hasil Model
model_summary <- data.frame(
Model = c("REM", "REM Log Transformasi", "GLS", "FGLS"),
Normalitas = c(shapiro_test$p.value, shapiro_test_log$p.value, shapiro_test_gls$p.value, shapiro_test_fgls$p.value),
Homoskedastisitas = c(bptest(model_rem)$p.value, bptest(model_rem_trans)$p.value, bptest(model_gls)$p.value, bptest(model_fgls)$p.value),
Autokorelasi = c(pdwtest(model_rem)$p.value, pdwtest(model_rem_trans)$p.value, pdwtest(model_gls)$p.value, pdwtest(model_fgls)$p.value)
)
print(model_summary)
## Model Normalitas Homoskedastisitas Autokorelasi
## 1 REM 1.060872e-15 1.278636e-20 2.599866e-22
## 2 REM Log Transformasi 3.849606e-11 3.281698e-03 3.152353e-31
## 3 GLS 1.060872e-15 1.278636e-20 2.599866e-22
## 4 FGLS 1.060872e-15 1.278636e-20 2.599866e-22
Berdasarkan hasil pengujian asumsi pada model REM, REM dengan transformasi log, GLS, dan FGLS, dapat disimpulkan bahwa tidak ada model yang sepenuhnya memenuhi semua asumsi klasik. Namun, model REM dengan transformasi log menunjukkan perbaikan signifikan dalam hal normalitas dan homoskedastisitas dibandingkan dengan model REM asli. Model GLS dan FGLS juga menunjukkan perbaikan, tetapi masih terdapat pelanggaran asumsi autokorelasi. Oleh karena itu, model REM dengan transformasi log dapat dianggap sebagai model yang lebih baik untuk analisis data panel ini, meskipun tetap perlu diinterpretasikan dengan hati-hati mengingat adanya pelanggaran asumsi lainnya.