Input Data

library(plm)
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lmtest)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
Data <- read_excel("D:/Fathoni/Projects/Semester 6/ADP/Data rapih.xlsx")
Data
## # A tibble: 306 × 7
##    PROVINSI       Tahun     Y    X1    X2    X3    X4
##    <chr>          <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 ACEH            2016 22835  7.57 13627  16.7  69.6
##  2 ACEH            2017 23363  6.57 14809  16.9  69.6
##  3 ACEH            2018 24014  6.34 13814  16.0  69.7
##  4 ACEH            2019 24842  6.17 15065  15.3  69.9
##  5 ACEH            2020 25018  6.59 18099  15.0  70.0
##  6 ACEH            2021 25356  6.3  17037  15.3  70.0
##  7 ACEH            2022 26062  6.17 16772  14.6  70.2
##  8 ACEH            2023 26800  6.03 17585  14.4  70.4
##  9 ACEH            2024 27684  5.75 17547  14.2  70.5
## 10 SUMATERA UTARA  2016 32885  5.84 11646  10.4  68.4
## # ℹ 296 more rows

Eksplorasi

summary(Data)
##    PROVINSI             Tahun            Y                X1        
##  Length:306         Min.   :2016   Min.   : 11469   Min.   : 1.400  
##  Class :character   1st Qu.:2018   1st Qu.: 26204   1st Qu.: 3.770  
##  Mode  :character   Median :2020   Median : 34184   Median : 4.625  
##                     Mean   :2020   Mean   : 43955   Mean   : 5.020  
##                     3rd Qu.:2022   3rd Qu.: 41848   3rd Qu.: 6.168  
##                     Max.   :2024   Max.   :201315   Max.   :10.950  
##        X2              X3               X4       
##  Min.   : 9940   Min.   : 3.470   Min.   :64.34  
##  1st Qu.:14631   1st Qu.: 6.320   1st Qu.:68.56  
##  Median :16834   Median : 8.865   Median :70.17  
##  Mean   :17778   Mean   :10.583   Mean   :70.10  
##  3rd Qu.:20228   3rd Qu.:13.525   3rd Qu.:71.64  
##  Max.   :42354   Max.   :28.540   Max.   :75.53
str(Data)
## tibble [306 × 7] (S3: tbl_df/tbl/data.frame)
##  $ PROVINSI: chr [1:306] "ACEH" "ACEH" "ACEH" "ACEH" ...
##  $ Tahun   : num [1:306] 2016 2017 2018 2019 2020 ...
##  $ Y       : num [1:306] 22835 23363 24014 24842 25018 ...
##  $ X1      : num [1:306] 7.57 6.57 6.34 6.17 6.59 6.3 6.17 6.03 5.75 5.84 ...
##  $ X2      : num [1:306] 13627 14809 13814 15065 18099 ...
##  $ X3      : num [1:306] 16.7 16.9 16 15.3 15 ...
##  $ X4      : num [1:306] 69.6 69.6 69.7 69.9 70 ...

Data sudah dalam bentuk numeric, sehingga tidak perlu dilakukan konversi tipe data.

Cek missing value

sum(is.na(Data))
## [1] 0

Ringkasan Per Tahun

ringkasan_waktu <- Data %>%
  group_by(Tahun) %>%
  summarise(across(c(Y, X1, X2, X3, X4), 
                   list(Mean = ~mean(., na.rm = TRUE), 
                        SD = ~sd(., na.rm = TRUE)), 
                   .names = "{.col}_{.fn}"))

print(ringkasan_waktu)
## # A tibble: 9 × 11
##   Tahun Y_Mean   Y_SD X1_Mean X1_SD X2_Mean X2_SD X3_Mean X3_SD X4_Mean X4_SD
##   <dbl>  <dbl>  <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl> <dbl>
## 1  2016 39246. 29891.    4.99  1.93  15115. 3724.   11.5   6.20    69.4  2.66
## 2  2017 40496. 30675.    5.10  1.84  15698. 3618.   11.3   6.01    69.4  2.65
## 3  2018 41998. 31536.    4.80  1.71  15911. 3890.   10.8   5.78    69.6  2.61
## 4  2019 43633. 33172.    4.71  1.56  16653. 3700.   10.5   5.65    69.9  2.55
## 5  2020 42502. 32213.    6.03  2.01  19538. 4722.   10.4   5.44    70.1  2.52
## 6  2021 43638  32892.    5.49  1.82  19423. 4639.   10.8   5.39    70.2  2.51
## 7  2022 45540. 34255.    4.97  1.60  17901. 4181.   10.2   5.25    70.4  2.45
## 8  2023 48179. 36033.    4.61  1.42  19662. 5664.   10.1   5.18    70.8  2.42
## 9  2024 50366. 37743.    4.47  1.32  20105. 4889.    9.57  4.57    71.0  2.36
ggplot(ringkasan_waktu, aes(x = Tahun, y = Y_Mean)) +
  geom_line(color = "darkblue", linewidth = 1.2) +
  geom_point(size = 3) +
  labs(
    title = "Tren Rata-rata Pertumbuhan Ekonomi (Y) per Tahun",
    subtitle = "Analisis Deskripsi: Perbedaan antar Waktu",
    x = "Tahun",
    y = "Nilai Y"
  ) +
  theme_minimal()

Ringkasan Per Provinsi

ringkasan_objek <- Data %>%
  group_by(PROVINSI) %>%
  summarise(across(c(Y, X1, X2, X3, X4), 
                   list(Mean = ~mean(., na.rm = TRUE), 
                        SD = ~sd(., na.rm = TRUE)), 
                   .names = "{.col}_{.fn}"))

print(ringkasan_objek)
## # A tibble: 34 × 11
##    PROVINSI      Y_Mean   Y_SD X1_Mean X1_SD X2_Mean X2_SD X3_Mean X3_SD X4_Mean
##    <chr>          <dbl>  <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl> <dbl>   <dbl>
##  1 ACEH          2.51e4  1579.    6.39 0.514  16039. 1720.   15.4  0.958    70.0
##  2 BALI          3.51e4  1822.    2.96 1.78   16934. 1357.    4.16 0.287    72.2
##  3 BANTEN        3.71e4  3766.    8.52 1.13   22741. 2219.    5.77 0.512    70.1
##  4 BENGKULU      2.34e4  1500.    3.50 0.294  15934. 1829.   15.2  1.15     69.3
##  5 DI YOGYAKARTA 2.81e4  3261.    3.63 0.655  14445. 1984.   12.1  0.892    75.0
##  6 DKI JAKARTA   1.74e5 16208.    7.31 1.54   29654. 6320.    4.14 0.496    73.1
##  7 GORONTALO     2.40e4  2113.    3.40 0.632  13939. 1176.   16.0  1.14     68.1
##  8 JAMBI         4.24e4  3250.    4.39 0.509  15480. 1651.    7.88 0.542    71.2
##  9 JAWA BARAT    3.07e4  2536.    8.46 1.14   18343. 2173.    7.94 0.659    73.3
## 10 JAWA TENGAH   2.76e4  1739.    5.11 0.732  12191. 1364.   11.5  0.996    74.4
## # ℹ 24 more rows
## # ℹ 1 more variable: X4_SD <dbl>
ggplot(ringkasan_objek, aes(x = fct_reorder(PROVINSI, Y_Mean), y = Y_Mean)) +
  geom_col(fill = "steelblue") +
  coord_flip() + # Memutar grafik agar nama provinsi terbaca jelas di samping
  labs(
    title = "Perbandingan Rata-rata Y antar Provinsi",
    subtitle = "Eksplorasi Perbedaan antar Objek (Individual Effects)",
    x = "Provinsi",
    y = "Rata-rata Y"
  ) +
  theme_minimal()

Pemodelan REM

Peubah signifikan

model_rem_full <- plm(Y ~ X1 + X2 + X3 + X4, data = Data, model = "random")
summary(model_rem_full)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = Y ~ X1 + X2 + X3 + X4, data = Data, model = "random")
## 
## Balanced Panel: n = 34, T = 9, N = 306
## 
## Effects:
##                     var   std.dev share
## idiosyncratic  14964104      3868 0.033
## individual    433562688     20822 0.967
## theta: 0.9382
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -12266.50  -1926.13   -281.67   1020.55  24787.17 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept) -2.5580e+05  4.9632e+04 -5.1539 2.552e-07 ***
## X1          -1.0657e+03  3.0678e+02 -3.4739  0.000513 ***
## X2           6.3848e-01  1.3260e-01  4.8152 1.470e-06 ***
## X3           5.4271e+02  3.4048e+02  1.5939  0.110950    
## X4           4.1084e+03  6.8596e+02  5.9893 2.107e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    8729400000
## Residual Sum of Squares: 5034200000
## R-Squared:      0.42331
## Adj. R-Squared: 0.41564
## Chisq: 220.943 on 4 DF, p-value: < 2.22e-16

Terlihat bahwa variabel X1, X2, dan X4 memiliki nilai p-value yang signifikan (p < 0.05), Sedangkan variabel X3 tidak signifikan (p > 0.05). Oleh karena itu, model REM yang akan digunakan untuk analisis selanjutnya adalah model yang hanya memasukkan variabel X1, X2, dan X4.

Model REM dengan variabel signifikan

model_rem <- plm(Y ~ X1 + X2 + X4, data = Data, model = "random")


summary(model_rem)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = Y ~ X1 + X2 + X4, data = Data, model = "random")
## 
## Balanced Panel: n = 34, T = 9, N = 306
## 
## Effects:
##                     var   std.dev share
## idiosyncratic  15244909      3904 0.034
## individual    428212456     20693 0.966
## theta: 0.9372
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -12564.07  -1932.68   -331.94    888.43  25308.25 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept) -2.0017e+05  3.5374e+04 -5.6588 1.524e-08 ***
## X1          -1.1277e+03  3.0561e+02 -3.6898 0.0002244 ***
## X2           6.8672e-01  1.2994e-01  5.2848 1.258e-07 ***
## X4           3.3891e+03  5.1833e+02  6.5385 6.215e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    8768200000
## Residual Sum of Squares: 5.107e+09
## R-Squared:      0.41756
## Adj. R-Squared: 0.41177
## Chisq: 216.505 on 3 DF, p-value: < 2.22e-16

Pengecekan Asumsi

Normalitas

residuals_rem <- resid(model_rem)
shapiro_test <- shapiro.test(residuals_rem)
print(shapiro_test)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals_rem
## W = 0.86497, p-value = 1.061e-15

Terlihat bahwa nilai p-value dari uji Shapiro-Wilk adalah 0.000, yang menunjukkan bahwa residual tidak berdistribusi normal.

Homoskedastisitas

bptest(model_rem)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_rem
## BP = 95.742, df = 3, p-value < 2.2e-16

Nilai p-value dari uji Breusch-Pagan adalah 0.000, yang menunjukkan adanya heteroskedastisitas dalam model. Ragam residual tidak konstan, sehingga menyebabkan hasil estimasi menjadi tidak efisien.

Auto Korelasi

pdwtest(model_rem)
## 
##  Durbin-Watson test for serial correlation in panel models
## 
## data:  Y ~ X1 + X2 + X4
## DW = 0.90678, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
pbgtest(model_rem)
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel models
## 
## data:  Y ~ X1 + X2 + X4
## chisq = 113.13, df = 9, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors

Nilai p-value dari uji Durbin-Watson adalah 0.000, yang menunjukkan adanya autokorelasi positif dalam model. Hal ini berarti bahwa residual pada suatu periode cenderung berkorelasi dengan residual pada periode sebelumnya, sehingga hasil estimasi menjadi tidak efisien.

Kesimpulan Sementara

Model Random Effects (REM) menunjukkan bahwa variabel X1, X2, X3, dan X4 memiliki pengaruh signifikan terhadap variabel dependen Y. Namun, model ini tidak memenuhi asumsi normalitas, homoskedastisitas, dan tidak adanya autokorelasi. Oleh karena itu, hasil estimasi dari model REM mungkin tidak efisien dan perlu dilakukan perbaikan model atau menggunakan metode estimasi yang lebih sesuai untuk mengatasi masalah tersebut.

Penanganan Pelanggaran Asumsi

Transformasi Variabel

model_lm <- lm(Y ~ X1 + X2 + X3 + X4, data = Data)

boxcox_result <- boxcox(model_lm,
                        lambda = seq(-2,2,0.1))

lambda_opt <- boxcox_result$x[which.max(boxcox_result$y)]

lambda_opt
## [1] -0.3838384

Lambda optimal yang diperoleh dari uji Box-Cox adalah -0.3838384, yang menunjukkan bahwa transformasi logaritma dapat digunakan untuk memperbaiki model. Oleh karena itu, model REM dengan transformasi log pada variabel dependen Y akan dibuat untuk mengatasi pelanggaran asumsi.

Transformasi

lambda <- -0.3838384

data <- Data %>%
  mutate(Y_trans = (Y^lambda - 1) / lambda)

Bentuk Ulang Data Panel

data_panel <- pdata.frame(data, index = c("PROVINSI","Tahun"))

Buat Model REM dengan Transformasi Log

model_rem_trans <- plm(Y_trans ~ X1 + X2 + X4,
                       data = data_panel,
                       model = "random")

summary(model_rem_trans)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = Y_trans ~ X1 + X2 + X4, data = data_panel, model = "random")
## 
## Balanced Panel: n = 34, T = 9, N = 306
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 1.691e-06 1.300e-03 0.049
## individual    3.286e-05 5.732e-03 0.951
## theta: 0.9246
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -5.0628e-03 -7.1758e-04 -5.1358e-05  5.0266e-04  6.9885e-03 
## 
## Coefficients:
##                Estimate  Std. Error  z-value Pr(>|z|)    
## (Intercept)  2.4231e+00  1.1433e-02 211.9494  < 2e-16 ***
## X1          -2.1116e-04  1.0119e-04  -2.0869  0.03690 *  
## X2           8.0835e-08  4.2612e-08   1.8970  0.05782 .  
## X4           1.9219e-03  1.6775e-04  11.4570  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    0.0010908
## Residual Sum of Squares: 0.00056493
## R-Squared:      0.48212
## Adj. R-Squared: 0.47697
## Chisq: 281.141 on 3 DF, p-value: < 2.22e-16

Model Alternatif: Generalized Least Squares (GLS)

model_gls <- plm(Y ~ X1 + X2 + X4, data = Data, model = "random", method = "gls")

summary(model_gls)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = Y ~ X1 + X2 + X4, data = Data, model = "random", 
##     method = "gls")
## 
## Balanced Panel: n = 34, T = 9, N = 306
## 
## Effects:
##                     var   std.dev share
## idiosyncratic  15244909      3904 0.034
## individual    428212456     20693 0.966
## theta: 0.9372
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -12564.07  -1932.68   -331.94    888.43  25308.25 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept) -2.0017e+05  3.5374e+04 -5.6588 1.524e-08 ***
## X1          -1.1277e+03  3.0561e+02 -3.6898 0.0002244 ***
## X2           6.8672e-01  1.2994e-01  5.2848 1.258e-07 ***
## X4           3.3891e+03  5.1833e+02  6.5385 6.215e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    8768200000
## Residual Sum of Squares: 5.107e+09
## R-Squared:      0.41756
## Adj. R-Squared: 0.41177
## Chisq: 216.505 on 3 DF, p-value: < 2.22e-16

Model Alternatif: Feasible Generalized Least Squares (FGLS)

model_fgls <- plm(Y ~ X1 + X2 + X4, data = Data, model = "random", method = "fgls")
summary(model_fgls)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = Y ~ X1 + X2 + X4, data = Data, model = "random", 
##     method = "fgls")
## 
## Balanced Panel: n = 34, T = 9, N = 306
## 
## Effects:
##                     var   std.dev share
## idiosyncratic  15244909      3904 0.034
## individual    428212456     20693 0.966
## theta: 0.9372
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -12564.07  -1932.68   -331.94    888.43  25308.25 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept) -2.0017e+05  3.5374e+04 -5.6588 1.524e-08 ***
## X1          -1.1277e+03  3.0561e+02 -3.6898 0.0002244 ***
## X2           6.8672e-01  1.2994e-01  5.2848 1.258e-07 ***
## X4           3.3891e+03  5.1833e+02  6.5385 6.215e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    8768200000
## Residual Sum of Squares: 5.107e+09
## R-Squared:      0.41756
## Adj. R-Squared: 0.41177
## Chisq: 216.505 on 3 DF, p-value: < 2.22e-16

Pengujian Model Alternatif

Model Transformasi

Uji Normalitas Residual Model Transformasi

residuals_log <- resid(model_rem_trans)
shapiro_test_log <- shapiro.test(residuals_log)
print(shapiro_test_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals_log
## W = 0.92646, p-value = 3.85e-11

Uji Homoskedastisitas Model Transformasi

bptest(model_rem_trans)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_rem_trans
## BP = 13.74, df = 3, p-value = 0.003282

Uji Autokorelasi Model Transformasi

pdwtest(model_rem_trans)
## 
##  Durbin-Watson test for serial correlation in panel models
## 
## data:  Y_trans ~ X1 + X2 + X4
## DW = 0.68808, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors

Model GLS

Uji Normalitas Residual Model GLS

residuals_gls <- resid(model_gls)
shapiro_test_gls <- shapiro.test(residuals_gls)
print(shapiro_test_gls)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals_gls
## W = 0.86497, p-value = 1.061e-15

Uji Homoskedastisitas Model GLS

bptest(model_gls)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_gls
## BP = 95.742, df = 3, p-value < 2.2e-16

Uji Autokorelasi Model GLS

pdwtest(model_gls)
## 
##  Durbin-Watson test for serial correlation in panel models
## 
## data:  Y ~ X1 + X2 + X4
## DW = 0.90678, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors

Model FGLS

Uji Normalitas Residual Model FGLS

residuals_fgls <- resid(model_fgls)
shapiro_test_fgls <- shapiro.test(residuals_fgls)
print(shapiro_test_fgls)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals_fgls
## W = 0.86497, p-value = 1.061e-15

Uji Homoskedastisitas Model FGLS

bptest(model_fgls)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_fgls
## BP = 95.742, df = 3, p-value < 2.2e-16

Uji Autokorelasi Model FGLS

pdwtest(model_fgls)
## 
##  Durbin-Watson test for serial correlation in panel models
## 
## data:  Y ~ X1 + X2 + X4
## DW = 0.90678, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors

Kesimpulan Akhir

# Ringkasan Hasil Model
model_summary <- data.frame(
  Model = c("REM", "REM Log Transformasi", "GLS", "FGLS"),
  Normalitas = c(shapiro_test$p.value, shapiro_test_log$p.value, shapiro_test_gls$p.value, shapiro_test_fgls$p.value),
  Homoskedastisitas = c(bptest(model_rem)$p.value, bptest(model_rem_trans)$p.value, bptest(model_gls)$p.value, bptest(model_fgls)$p.value),
  Autokorelasi = c(pdwtest(model_rem)$p.value, pdwtest(model_rem_trans)$p.value, pdwtest(model_gls)$p.value, pdwtest(model_fgls)$p.value)
)
print(model_summary)
##                  Model   Normalitas Homoskedastisitas Autokorelasi
## 1                  REM 1.060872e-15      1.278636e-20 2.599866e-22
## 2 REM Log Transformasi 3.849606e-11      3.281698e-03 3.152353e-31
## 3                  GLS 1.060872e-15      1.278636e-20 2.599866e-22
## 4                 FGLS 1.060872e-15      1.278636e-20 2.599866e-22

Berdasarkan hasil pengujian asumsi pada model REM, REM dengan transformasi log, GLS, dan FGLS, dapat disimpulkan bahwa tidak ada model yang sepenuhnya memenuhi semua asumsi klasik. Namun, model REM dengan transformasi log menunjukkan perbaikan signifikan dalam hal normalitas dan homoskedastisitas dibandingkan dengan model REM asli. Model GLS dan FGLS juga menunjukkan perbaikan, tetapi masih terdapat pelanggaran asumsi autokorelasi. Oleh karena itu, model REM dengan transformasi log dapat dianggap sebagai model yang lebih baik untuk analisis data panel ini, meskipun tetap perlu diinterpretasikan dengan hati-hati mengingat adanya pelanggaran asumsi lainnya.