ADN

colSums(is.na(data_ADN))
## Provinsi    Tahun        Y       X1       X2       X3       X4       X5 
##        0        0        0        0        0        0        0        0
pred_use <- c("X1","X2","X3","X4","X5")
df_ADN2 <- data_ADN

# Transformasi Y (count data)
df_ADN2$t_Y <- log(df_ADN2$Y + 1)

# Transformasi X
for (v in pred_use) {
  min_val <- min(df_ADN2[[v]], na.rm = TRUE)

  if (min_val <= 0) {
    shift <- abs(min_val) + 1
    df_ADN2[[paste0("t_", v)]] <- log(df_ADN2[[v]] + shift)
  } else {
    df_ADN2[[paste0("t_", v)]] <- log(df_ADN2[[v]])
  }
}

df_ADN2 <- na.omit(df_ADN2)
df_ADN2 %>%
  select(t_X1, t_X2, t_X3, t_X4, t_X5) %>%
  cor() %>%
  ggcorrplot(type = "lower", lab = TRUE)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggcorrplot package.
##   Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

data_ADN %>% select(c(-1,-2)) %>% cor() %>% 
  ggcorrplot(type = "lower",lab = TRUE)

lm(t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
   data = df_ADN2) %>% vif()
##     t_X1     t_X2     t_X3     t_X4     t_X5 
## 1.095283 1.794282 1.851701 1.159497 1.024614
cem <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data  = df_ADN2,
  index = c("Provinsi","Tahun"),
  model = "pooling"
)
fem <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data  = df_ADN2,
  index = c("Provinsi","Tahun"),
  model = "within"
)
rem <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data  = df_ADN2,
  index = c("Provinsi","Tahun"),
  model = "random"
)
pooltest(cem, fem)
## 
##  F statistic
## 
## data:  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5
## F = 3.0704, df1 = 9, df2 = 55, p-value = 0.004726
## alternative hypothesis: unstability
phtest(fem, rem)
## 
##  Hausman Test
## 
## data:  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5
## chisq = 3.2088, df = 5, p-value = 0.6678
## alternative hypothesis: one model is inconsistent
rem.final <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data = df_ADN2,
  index = c("Provinsi", "Tahun"),
  model = "random"
)

summary(rem.final)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5, data = df_ADN2, 
##     model = "random", index = c("Provinsi", "Tahun"))
## 
## Balanced Panel: n = 10, T = 7, N = 70
## 
## Effects:
##                  var std.dev share
## idiosyncratic 0.2806  0.5297 0.649
## individual    0.1515  0.3893 0.351
## theta: 0.5426
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -1.15924 -0.32467  0.07931  0.35244  1.16264 
## 
## Coefficients:
##              Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept) -5.757837   2.969785 -1.9388 0.0525250 .  
## t_X1        -0.014967   0.053812 -0.2781 0.7809021    
## t_X2         0.407158   0.112771  3.6105 0.0003056 ***
## t_X3         0.379748   0.375461  1.0114 0.3118157    
## t_X4         0.431680   0.088390  4.8838 1.041e-06 ***
## t_X5         0.669994   0.326114  2.0545 0.0399294 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    32.174
## Residual Sum of Squares: 17.548
## R-Squared:      0.4546
## Adj. R-Squared: 0.41199
## Chisq: 53.3456 on 5 DF, p-value: 2.8562e-10

KHP

colSums(is.na(data_KHP))
## Provinsi    Tahun        Y       X1       X2       X3       X4       X5 
##        0        0        0        0        0        0        0        0
pred_use <- c("X1","X2","X3","X4","X5")
data_KHP2 <- data_KHP

# Transformasi Y (count data)
data_KHP2$t_Y <- log(data_KHP2$Y + 1)

# Transformasi X
for (v in pred_use) {
  min_val <- min(data_KHP2[[v]], na.rm = TRUE)

  if (min_val <= 0) {
    shift <- abs(min_val) + 1
    data_KHP2[[paste0("t_", v)]] <- log(data_KHP2[[v]] + shift)
  } else {
    data_KHP2[[paste0("t_", v)]] <- log(data_KHP2[[v]])
  }
}

data_KHP2 <- na.omit(data_KHP2)
data_KHP2 %>%
  select(t_X1, t_X2, t_X3, t_X4, t_X5) %>%
  cor() %>%
  ggcorrplot(type = "lower", lab = TRUE)

data_KHP %>% select(c(-1,-2)) %>% cor() %>% 
  ggcorrplot(type = "lower",lab = TRUE)

lm(t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
   data = data_KHP2) %>% vif()
##     t_X1     t_X2     t_X3     t_X4     t_X5 
## 1.332399 1.834779 2.088518 1.159603 1.178061
cem <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data  = data_KHP2,
  index = c("Provinsi","Tahun"),
  model = "pooling"
)
fem <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data  = data_KHP2,
  index = c("Provinsi","Tahun"),
  model = "within"
)
rem <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data  = data_KHP2,
  index = c("Provinsi","Tahun"),
  model = "random"
)
pooltest(cem, fem)
## 
##  F statistic
## 
## data:  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5
## F = 3.0866, df1 = 9, df2 = 55, p-value = 0.004551
## alternative hypothesis: unstability
phtest(fem, rem)
## 
##  Hausman Test
## 
## data:  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5
## chisq = 5.5712, df = 5, p-value = 0.3502
## alternative hypothesis: one model is inconsistent
rem.final <- plm(
  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5,
  data = data_KHP2,
  index = c("Provinsi", "Tahun"),
  model = "random"
)

summary(rem.final)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5, data = data_KHP2, 
##     model = "random", index = c("Provinsi", "Tahun"))
## 
## Balanced Panel: n = 10, T = 7, N = 70
## 
## Effects:
##                  var std.dev share
## idiosyncratic 0.2714  0.5209 0.675
## individual    0.1307  0.3615 0.325
## theta: 0.5217
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -1.096134 -0.288488  0.066527  0.347317  1.119853 
## 
## Coefficients:
##              Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept) -7.013334   3.109736 -2.2553  0.024116 *  
## t_X1         0.099003   0.107370  0.9221  0.356494    
## t_X2         0.420673   0.109260  3.8502  0.000118 ***
## t_X3         0.487862   0.379935  1.2841  0.199119    
## t_X4         0.446294   0.088095  5.0661 4.061e-07 ***
## t_X5         0.600017   0.330150  1.8174  0.069154 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    33.127
## Residual Sum of Squares: 17.496
## R-Squared:      0.47186
## Adj. R-Squared: 0.4306
## Chisq: 57.1799 on 5 DF, p-value: 4.6432e-11
library(plm)

pbgtest(rem.final)
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel models
## 
## data:  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5
## chisq = 5.639, df = 7, p-value = 0.5825
## alternative hypothesis: serial correlation in idiosyncratic errors
library(lmtest)

bptest(rem.final)
## 
##  studentized Breusch-Pagan test
## 
## data:  rem.final
## BP = 3.4432, df = 5, p-value = 0.632
pcdtest(rem.final, test = "cd")
## 
##  Pesaran CD test for cross-sectional dependence in panels
## 
## data:  t_Y ~ t_X1 + t_X2 + t_X3 + t_X4 + t_X5
## z = -0.47588, p-value = 0.6342
## alternative hypothesis: cross-sectional dependence