Sumber konsep utama: Verbeek (2012) dan Greene (2018). Kode berikut siap dijalankan — ganti nama file/data & variabel sesuai kebutuhan.
# Paket inti
library(plm) # model panel (pooled, FE, RE, PGMM)
library(dplyr) # wrangling
library(readr) # read_csv
library(ggplot2) # visualisasi
library(lmtest) # uji BP, BG, dll
library(sandwich) # vcov robust
library(car) # VIF untuk indikasi multikolinearitas
Catatan: Fungsi-fungsi kunci yang dipakai:
pdata.frame(),plm(),pFtest(),plmtest(),phtest(),pgmm(),mtest(),summary(),bptest(),pbgtest(),vcovHC(),coeftest().
Ganti jalur dan nama kolom agar sesuai dengan data Anda. Asumsikan
id = unit (individu/kabupaten/firm), tahun =
waktu.
# Contoh: baca dari CSV (ganti dengan file Anda)
# data <- read_csv("data_panel.csv")
# Alternatif contoh (dataset bawaan plm: 'Produc')
data("Produc", package = "plm")
data <- Produc %>% rename(id = state, tahun = year, y = gsp, x1 = pcap, x2 = hwy)
# Pastikan tidak ada NA pada variabel utama
data <- data %>% filter(!is.na(y), !is.na(x1), !is.na(x2))
# Bentuk panel
pdata <- pdata.frame(data, index = c("id", "tahun"))
str(pdata)
## Classes 'pdata.frame' and 'data.frame': 816 obs. of 11 variables:
## $ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ tahun : Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ region: Factor w/ 9 levels "1","2","3","4",..: 6 6 6 6 6 6 6 6 6 6 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ x1 : 'pseries' Named num 15033 15502 15972 16406 16763 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ x2 : 'pseries' Named num 7326 7526 7765 7908 8026 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ water : 'pseries' Named num 1656 1721 1765 1742 1735 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ util : 'pseries' Named num 6051 6255 6442 6756 7002 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ pc : 'pseries' Named num 35794 37300 38670 40084 42057 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ y : 'pseries' Named int 28418 29375 31303 33430 33749 33604 35764 37463 39964 40979 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ emp : 'pseries' Named num 1010 1022 1072 1136 1170 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ unemp : 'pseries' Named num 4.7 5.2 4.7 3.9 5.5 7.7 6.8 7.4 6.3 7.1 ...
## ..- attr(*, "names")= chr [1:816] "ALABAMA-1970" "ALABAMA-1971" "ALABAMA-1972" "ALABAMA-1973" ...
## ..- attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## .. ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "index")=Classes 'pindex' and 'data.frame': 816 obs. of 2 variables:
## ..$ id : Factor w/ 48 levels "ALABAMA","ARIZONA",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..$ tahun: Factor w/ 17 levels "1970","1971",..: 1 2 3 4 5 6 7 8 9 10 ...
summary(pdata[, c("y", "x1", "x2")])
## y x1 x2
## Min. : 4354 Min. : 2627 Min. : 1827
## 1st Qu.: 16502 1st Qu.: 7097 1st Qu.: 3858
## Median : 39987 Median : 17572 Median : 7556
## Mean : 61014 Mean : 25037 Mean :10218
## 3rd Qu.: 68126 3rd Qu.: 27692 3rd Qu.:11267
## Max. :464550 Max. :140217 Max. :47699
ggplot(as.data.frame(pdata), aes(tahun, y, group = id)) +
geom_line(alpha = 0.3) +
labs(title = "Tren Variabel Y per Unit (Panel)",
x = "Tahun", y = "Y") +
theme_minimal()
mean_time <- as.data.frame(pdata) %>% group_by(tahun) %>% summarise(mean_y = mean(y, na.rm=TRUE))
mean_unit <- as.data.frame(pdata) %>% group_by(id) %>% summarise(mean_y = mean(y, na.rm=TRUE))
head(mean_time); head(mean_unit)
## # A tibble: 6 × 2
## tahun mean_y
## <fct> <dbl>
## 1 1970 48880.
## 2 1971 49990.
## 3 1972 52682.
## 4 1973 55703.
## 5 1974 55309.
## 6 1975 54403.
## # A tibble: 6 × 2
## id mean_y
## <fct> <dbl>
## 1 ALABAMA 38146.
## 2 ARIZONA 31033.
## 3 ARKANSAS 21866.
## 4 CALIFORNIA 350057.
## 5 COLORADO 39417.
## 6 CONNECTICUT 46044.
Bentuk umum (Greene; Verbeek):
\[ y_{it} = \alpha + \beta’x_{it} + u_{it}, \quad u_{it} = \mu_i + \nu_{it} \]
mod_pool <- plm(y ~ x1 + x2, data = pdata, model = "pooling")
summary(mod_pool)
## Pooling Model
##
## Call:
## plm(formula = y ~ x1 + x2, data = pdata, model = "pooling")
##
## Balanced Panel: n = 48, T = 17, N = 816
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -56651.23 -6678.30 451.03 5125.72 139198.36
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -7994.77991 879.12207 -9.094 < 2.2e-16 ***
## x1 1.23625 0.08133 15.200 < 2.2e-16 ***
## x2 3.72440 0.24416 15.254 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 3.9905e+12
## Residual Sum of Squares: 1.8559e+11
## R-Squared: 0.95349
## Adj. R-Squared: 0.95338
## F-statistic: 8334.12 on 2 and 813 DF, p-value: < 2.22e-16
mod_fe <- plm(y ~ x1 + x2, data = pdata, model = "within", effect = "individual")
summary(mod_fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = y ~ x1 + x2, data = pdata, effect = "individual",
## model = "within")
##
## Balanced Panel: n = 48, T = 17, N = 816
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -67752.764 -2054.994 -92.758 1558.202 98943.352
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## x1 3.79605 0.19641 19.3270 < 2.2e-16 ***
## x2 -4.59798 0.75602 -6.0818 1.875e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.3289e+11
## Residual Sum of Squares: 7.4368e+10
## R-Squared: 0.44038
## Adj. R-Squared: 0.40458
## F-statistic: 301.388 on 2 and 766 DF, p-value: < 2.22e-16
# Koefisien dengan standar error robust (Arellano)
coeftest(mod_fe, vcov = vcovHC(mod_fe, type = "HC1", cluster = "group"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## x1 3.7960 1.9585 1.9382 0.05296 .
## x2 -4.5980 6.8662 -0.6697 0.50328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_re <- plm(y ~ x1 + x2, data = pdata, model = "random", effect = "individual")
summary(mod_re)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = y ~ x1 + x2, data = pdata, effect = "individual",
## model = "random")
##
## Balanced Panel: n = 48, T = 17, N = 816
##
## Effects:
## var std.dev share
## idiosyncratic 97086687 9853 0.455
## individual 116278445 10783 0.545
## theta: 0.7836
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -68971.81 -2042.71 -384.38 1583.89 110157.75
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 142.24145 2530.38522 0.0562 0.9552
## x1 2.75684 0.16228 16.9878 <2e-16 ***
## x2 -0.79758 0.50530 -1.5784 0.1145
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 3.1349e+11
## Residual Sum of Squares: 8.6097e+10
## R-Squared: 0.72536
## Adj. R-Squared: 0.72468
## Chisq: 2147.19 on 2 DF, p-value: < 2.22e-16
# FE vs Pooled
u_chow <- pFtest(mod_fe, mod_pool)
# RE vs Pooled
u_lm <- plmtest(mod_pool, type = "bp")
# FE vs RE
u_haus <- phtest(mod_fe, mod_re)
u_chow; u_lm; u_haus
##
## F test for individual effects
##
## data: y ~ x1 + x2
## F = 24.374, df1 = 47, df2 = 766, p-value < 2.2e-16
## alternative hypothesis: significant effects
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: y ~ x1 + x2
## chisq = 1498.6, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
##
## Hausman Test
##
## data: y ~ x1 + x2
## chisq = 88.97, df = 2, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# Heteroskedastisitas (BP) pada FE (gunakan vcov robust bila perlu)
bp_fe <- bptest(mod_fe)
# Autokorelasi panel (BG) pada FE
bg_fe <- pbgtest(mod_fe)
# Multikolinearitas indikatif (gunakan OLS biasa untuk VIF)
vif_ols <- vif(lm(y ~ x1 + x2, data = as.data.frame(pdata)))
bp_fe; bg_fe; vif_ols
##
## studentized Breusch-Pagan test
##
## data: mod_fe
## BP = 204.58, df = 2, p-value < 2.2e-16
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: y ~ x1 + x2
## chisq = 506.21, df = 17, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
## x1 x2
## 18.22553 18.22553
Catatan interpretasi ringkas:
- Jika p-value uji Chow < 0,05 ⇒ FE lebih tepat daripada pooled.
- Jika p-value LM BP < 0,05 ⇒ RE lebih tepat daripada pooled.
- Jika p-value Hausman < 0,05 ⇒ FE konsisten, pilih FE; jika > 0,05 ⇒ RE efisien.
- Jika ada heteroskedastisitas/autokorelasi, gunakan standar error robust viavcovHC()+coeftest().
Bentuk umum:
\[ y_{it} = \rho y_{i,t-1} + \beta’ x_{it} + \mu_i + \nu_{it} \]
Masalah endogenitas pada \(y_{i,t-1}\) diatasi dengan instrumen internal (lag lebih dalam).
pdata <- pdata %>%
group_by(id) %>%
arrange(tahun, .by_group = TRUE) %>%
mutate(y_lag1 = lag(y, 1))
Fungsi
pgmm()ada di paket plm. Sintaks instrumen:|memisahkan regressor dan instrumen.
lag(y, 2:99)artinya gunakan lag ke-2 dst sebagai instrumen (setel sesuai T).
mod_gmm <- pgmm(
formula = y ~ lag(y, 1) + x1 + x2 | lag(y, 2:99),
data = pdata,
effect = "individual",
model = "twosteps",
transformation = "d" # "d" = first-difference (Arellano–Bond); "ld" = system GMM
)
summary(mod_gmm)
## Oneway (individual) effect Two-steps model Difference GMM
##
## Call:
## pgmm(formula = y ~ lag(y, 1) + x1 + x2 | lag(y, 2:99), data = pdata,
## effect = "individual", model = "twosteps", transformation = "d")
##
## Balanced Panel: n = 48, T = 17, N = 816
##
## Number of Observations Used: 720
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -16930.19 -873.19 -27.95 90.27 934.97 23781.74
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## lag(y, 1) 1.0296921 0.0264143 38.9823 <2e-16 ***
## x1 -0.0039313 0.1518586 -0.0259 0.9793
## x2 -0.5882696 0.4192793 -1.4030 0.1606
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sargan test: chisq(119) = 35.50188 (p-value = 1)
## Autocorrelation test (1): normal = -1.808499 (p-value = 0.070529)
## Autocorrelation test (2): normal = -3.038039 (p-value = 0.0023812)
## Wald test for coefficients: chisq(3) = 5292.452 (p-value = < 2.22e-16)
# Uji AR(1) dan AR(2)
ar1 <- mtest(mod_gmm, order = 1)
ar2 <- mtest(mod_gmm, order = 2)
# Ringkasan Sargan/Hansen sudah ada di summary(mod_gmm)
ar1; ar2
##
## Arellano-Bond autocorrelation test of degree 1
##
## data: y ~ lag(y, 1) + x1 + x2 | lag(y, 2:99)
## normal = -1.8793, p-value = 0.0602
## alternative hypothesis: autocorrelation present
##
## Arellano-Bond autocorrelation test of degree 2
##
## data: y ~ lag(y, 1) + x1 + x2 | lag(y, 2:99)
## normal = -3.06, p-value = 0.002214
## alternative hypothesis: autocorrelation present
Kriteria ringkas:
- AR(1) biasanya signifikan (karena differencing), AR(2) seharusnya tidak signifikan (p-value > 0,05).
- Sargan/Hansen: p-value yang tidak terlalu kecil ⇒ instrumen valid. Hindari terlalu banyak instrumen (overfitting instruments).
pdata.frame().summary(), ggplot(), agregasi rata-rata.plm().pFtest(),
plmtest(type="bp"), phtest().bptest(),
pbgtest(), vcovHC() + coeftest(),
vif().pgmm(); uji
mtest() dan cek Sargan/Hansen pada
summary().fit_panel_static <- function(pdata, fml) {
pool <- plm(fml, pdata, model = "pooling")
fe <- plm(fml, pdata, model = "within")
re <- plm(fml, pdata, model = "random")
list(
pooled = pool,
fe = fe,
re = re,
tests = list(
chow = pFtest(fe, pool),
lm_bp = plmtest(pool, type = "bp"),
hausman = phtest(fe, re)
)
)
}
fit_panel_dynamic <- function(pdata, fml_gmm, effect = "individual",
model = "twosteps", trans = "d") {
mod <- pgmm(fml_gmm, data = pdata, effect = effect,
model = model, transformation = trans)
list(
gmm = mod,
ar1 = mtest(mod, 1),
ar2 = mtest(mod, 2)
)
}