# ============================
# Load library
# ============================
library(plm) # dataset Grunfeld
## Warning: package 'plm' was built under R version 4.4.3
library(dplyr) # data wrangling
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2) # visualisasi
## Warning: package 'ggplot2' was built under R version 4.4.3
library(car) # VIF
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest) # bptest, dwtest
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich) # robust SE
## Warning: package 'sandwich' was built under R version 4.4.3
library(ggpubr) # tambahan plot
## Warning: package 'ggpubr' was built under R version 4.4.3
## Registered S3 method overwritten by 'broom':
## method from
## nobs.felm lfe
# ============================
# Import Dataset
# ============================
library(plm)
data("Grunfeld")
df <- Grunfeld
# Struktur data asli
str(df)
## 'data.frame': 200 obs. of 5 variables:
## $ firm : int 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 ...
## $ inv : num 318 392 411 258 331 ...
## $ value : num 3078 4662 5387 2792 4313 ...
## $ capital: num 2.8 52.6 156.9 209.2 203.4 ...
head(df)
## firm year inv value capital
## 1 1 1935 317.6 3078.5 2.8
## 2 1 1936 391.8 4661.7 52.6
## 3 1 1937 410.6 5387.1 156.9
## 4 1 1938 257.7 2792.2 209.2
## 5 1 1939 330.8 4313.2 203.4
## 6 1 1940 461.2 4643.9 207.2
# Statistik deskriptif seluruh variabel
summary(df)
## firm year inv value
## Min. : 1.0 Min. :1935 Min. : 0.93 Min. : 58.12
## 1st Qu.: 3.0 1st Qu.:1940 1st Qu.: 33.56 1st Qu.: 199.97
## Median : 5.5 Median :1944 Median : 57.48 Median : 517.95
## Mean : 5.5 Mean :1944 Mean : 145.96 Mean :1081.68
## 3rd Qu.: 8.0 3rd Qu.:1949 3rd Qu.: 138.04 3rd Qu.:1679.85
## Max. :10.0 Max. :1954 Max. :1486.70 Max. :6241.70
## capital
## Min. : 0.80
## 1st Qu.: 79.17
## Median : 205.60
## Mean : 276.02
## 3rd Qu.: 358.10
## Max. :2226.30
# ============================
# Data Preprocessing
# ============================
# Cek missing values
colSums(is.na(df))
## firm year inv value capital
## 0 0 0 0 0
# Cek duplikasi
sum(duplicated(df))
## [1] 0
# Outlier detection (boxplot)
num_vars <- df %>%
select_if(is.numeric) %>%
select(-firm, -year)
for (var in names(num_vars)) {
print(
ggplot(num_vars, aes_string(y = var)) +
geom_boxplot(fill = "steelblue", alpha = 0.7) +
labs(title = paste("Boxplot", var))
)
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# 4. Identifikasi outlier secara numerik (pakai IQR method)
detect_outliers <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
which(x < lower | x > upper)
}
outlier_index <- lapply(num_vars, detect_outliers)
outlier_index
## $inv
## [1] 1 2 3 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 22 23 26 27 28 29
## [26] 32 33 34 35 36 37 38 39 40
##
## $value
## [1] 2 3 5 6 7 9 10 11 12 17 18 19 20
##
## $capital
## [1] 14 15 16 17 18 19 20 59 60 100
# Standarisasi / normalisasi
num_vars_scaled <- as.data.frame(scale(num_vars))
summary(num_vars_scaled)
## inv value capital
## Min. :-0.66872 Min. :-0.7787 Min. :-0.9140
## 1st Qu.:-0.51827 1st Qu.:-0.6708 1st Qu.:-0.6537
## Median :-0.40795 Median :-0.4289 Median :-0.2339
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.03651 3rd Qu.: 0.4551 3rd Qu.: 0.2726
## Max. : 6.18209 Max. : 3.9256 Max. : 6.4771
# ============================
# Exploratory Data Analysis
# ============================
# Statistik deskriptif khusus variabel numerik relevan
summary(num_vars)
## inv value capital
## Min. : 0.93 Min. : 58.12 Min. : 0.80
## 1st Qu.: 33.56 1st Qu.: 199.97 1st Qu.: 79.17
## Median : 57.48 Median : 517.95 Median : 205.60
## Mean : 145.96 Mean :1081.68 Mean : 276.02
## 3rd Qu.: 138.04 3rd Qu.:1679.85 3rd Qu.: 358.10
## Max. :1486.70 Max. :6241.70 Max. :2226.30
# Korelasi antar variabel numerik
cor(num_vars)
## inv value capital
## inv 1.0000000 0.8569329 0.6625653
## value 0.8569329 1.0000000 0.4887058
## capital 0.6625653 0.4887058 1.0000000
# Scatterplot matrix dengan histogram & korelasi
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
ggpairs(num_vars, title = "Scatterplot Matrix")
# ============================
# Hubungan antar variabel
# ============================
# investasi vs value
ggplot(num_vars, aes(x = value, y = inv)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "darkred") +
labs(title = "Investasi vs Value",
x = "Value", y = "Investasi")
## `geom_smooth()` using formula = 'y ~ x'
# investasi vs capital
ggplot(num_vars, aes(x = capital, y = inv)) +
geom_point(alpha = 0.6, color = "darkgreen") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Investasi vs Capital",
x = "Capital", y = "Investasi")
## `geom_smooth()` using formula = 'y ~ x'
# ============================
# Regresi Linear Berganda (RLB)
# ============================
# Model regresi: inv ~ value + capital
model <- lm(inv ~ value + capital, data = num_vars)
# Ringkasan hasil regresi
summary(model)
##
## Call:
## lm(formula = inv ~ value + capital, data = num_vars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -291.68 -30.01 5.30 34.83 369.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 9.511676 -4.491 1.21e-05 ***
## value 0.115562 0.005836 19.803 < 2e-16 ***
## capital 0.230678 0.025476 9.055 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 94.41 on 197 degrees of freedom
## Multiple R-squared: 0.8124, Adjusted R-squared: 0.8105
## F-statistic: 426.6 on 2 and 197 DF, p-value: < 2.2e-16
library(plm) # dataset Grunfeld
library(dplyr)
library(ggplot2)
library(car) # VIF
library(lmtest) # bptest, dwtest, bgtest
library(sandwich) # robust SE (untuk uji, bukan remedial di sini)
library(ggpubr) # visualisasi
library(GGally)
library(tseries) # jarque.bera.test
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# ============================
# Cek Asumsi Klasik RLB
# ============================
# ================================================
# 1. Linearitas dalam parameter
# Tambahan: cek linearitas hubungan X-Y dengan residual plot
# ================================================
resid_model <- residuals(model)
ggplot(df, aes(x = value, y = resid_model)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_hline(yintercept = 0, color = "red") +
labs(title = "Residual vs Value")
ggplot(df, aes(x = capital, y = resid_model)) +
geom_point(alpha = 0.6, color = "darkgreen") +
geom_hline(yintercept = 0, color = "red") +
labs(title = "Residual vs Capital")
# ================================================
# 2. Nilai harapan error E(u) = 0
# - implisit: rata-rata residual ~ 0
# ================================================
mean(resid_model)
## [1] 2.882729e-15
# ================================================
# 3. Homoskedastisitas
# ================================================
# Plot residual vs fitted
plot(fitted(model), resid_model,
xlab = "Fitted Values", ylab = "Residuals",
main = "Fitted vs Residuals")
abline(h = 0, col = "red", lwd = 2)
# Breusch-Pagan test
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 55.247, df = 2, p-value = 1.008e-12
# White test (alternatif Gujarati)
bptest(model, ~ fitted(model) + I(fitted(model)^2))
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 54.692, df = 2, p-value = 1.33e-12
# ================================================
# 4. Tidak ada autokorelasi
# ================================================
dwtest(model) # Durbin-Watson
##
## Durbin-Watson test
##
## data: model
## DW = 0.35819, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(model, order=2) # Breusch-Godfrey (autokorelasi orde 2)
##
## Breusch-Godfrey test for serial correlation of order up to 2
##
## data: model
## LM test = 135.37, df = 2, p-value < 2.2e-16
# ================================================
# 5. Jumlah observasi > jumlah variabel
# ================================================
nrow(df) # jumlah observasi
## [1] 200
length(coef(model)) - 1 # jumlah prediktor
## [1] 2
# ================================================
# 6. Variasi X cukup
# lihat apakah nilai var(value) dan var(capital) > 0. Kalau tidak nol → asumsi ini terpenuhi.
# ================================================
var(df$value)
## [1] 1727831
var(df$capital)
## [1] 90663.56
# ================================================
# 7. Spesifikasi model benar
# (gunakan RESET test sebagai indikasi)
# ================================================
resettest(model)
##
## RESET test
##
## data: model
## RESET = 19.56, df1 = 2, df2 = 195, p-value = 1.812e-08
# ================================================
# 8. Tidak ada multikolinearitas
# ================================================
vif(model)
## value capital
## 1.313773 1.313773
# ================================================
# 9. Normalitas error
# ================================================
hist(resid_model, breaks = 30, col = "skyblue",
main = "Histogram Residual", xlab = "Residual")
qqnorm(resid_model); qqline(resid_model, col = "red", lwd = 2)
# Shapiro-Wilk
shapiro.test(resid_model)
##
## Shapiro-Wilk normality test
##
## data: resid_model
## W = 0.8798, p-value = 1.552e-11
# Jarque-Bera
jarque.bera.test(resid_model)
##
## Jarque Bera Test
##
## data: resid_model
## X-squared = 89.572, df = 2, p-value < 2.2e-16
# ================================================
# Hildreth–Lu Procedure untuk atasi autokorelasi
# ================================================
library(lmtest)
# Model awal (OLS)
model_ols <- lm(inv ~ value + capital, data = df)
# Definisi fungsi Hildreth-Lu
hildreth.lu.func <- function(r, model){
X <- model.matrix(model)[,-1] # prediktor (drop intercept)
y <- model.response(model.frame(model)) # respon
n <- length(y)
t <- 2:n
y <- y[t] - r * y[t-1]
X <- X[t, ] - r * X[t-1, ]
return(lm(y ~ X)) # regresi dengan data transformasi
}
# ================================================
# Cari rho optimum dengan grid search
# ================================================
r <- c(seq(0.1, 0.9, by=0.1), seq(0.91, 0.99, by=0.01))
tab <- data.frame(
rho = r,
SSE = sapply(r, function(i){deviance(hildreth.lu.func(i, model_ols))})
)
head(tab, 10)
## rho SSE
## 1 0.10 1485132.8
## 2 0.20 1249240.5
## 3 0.30 1048008.7
## 4 0.40 881239.6
## 5 0.50 748699.8
## 6 0.60 650169.9
## 7 0.70 585561.8
## 8 0.80 555178.8
## 9 0.90 560085.2
## 10 0.91 562578.9
# Plot rho vs SSE
plot(tab$rho, tab$SSE, type="l", main="Hildreth–Lu Grid Search", xlab="rho", ylab="SSE")
abline(v = tab$rho[which.min(tab$SSE)], lty=3, col="red")
# Refinement sekitar nilai minimum
rho_range <- seq(0.70, 0.80, by=0.01)
tab2 <- data.frame(
rho = rho_range,
SSE = sapply(rho_range, function(i){deviance(hildreth.lu.func(i, model_ols))})
)
tab2
## rho SSE
## 1 0.70 585561.8
## 2 0.71 580972.8
## 3 0.72 576726.0
## 4 0.73 572822.2
## 5 0.74 569262.1
## 6 0.75 566046.7
## 7 0.76 563176.9
## 8 0.77 560654.0
## 9 0.78 558479.1
## 10 0.79 556653.5
## 11 0.80 555178.8
abline(v = tab2$rho[which.min(tab2$SSE)], lty=3, col="blue")
# ================================================
# Model terbaik dengan rho optimum
# ================================================
rho_opt <- tab2$rho[which.min(tab2$SSE)]
model_hl <- hildreth.lu.func(rho_opt, model_ols)
summary(model_hl)
##
## Call:
## lm(formula = y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -157.144 -21.577 -2.248 9.719 279.808
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.563327 4.054134 -2.112 0.0359 *
## Xvalue 0.099549 0.008651 11.507 <2e-16 ***
## Xcapital 0.291743 0.024475 11.920 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 53.22 on 196 degrees of freedom
## Multiple R-squared: 0.7646, Adjusted R-squared: 0.7622
## F-statistic: 318.2 on 2 and 196 DF, p-value: < 2.2e-16
# ================================================
# Cek autokorelasi ulang
# ================================================
dwtest(model_hl)
##
## Durbin-Watson test
##
## data: model_hl
## DW = 1.7705, p-value = 0.05068
## alternative hypothesis: true autocorrelation is greater than 0
# ================================================
# Transformasi balik ke model asli
# ================================================
b0 <- coef(model_hl)[1] / (1 - rho_opt)
b1 <- coef(model_hl)[2] # untuk value
b2 <- coef(model_hl)[3] # untuk capital
cat("\nModel setelah Hildreth–Lu:\n")
##
## Model setelah Hildreth–Lu:
cat("ŷ = ", round(b0, 3), " + ", round(b1, 3), "* value + ", round(b2, 3), "* capital\n")
## ŷ = -42.817 + 0.1 * value + 0.292 * capital
p-value = 0.05068 > 0.05 → gagal menolak H₀
Artinya tidak ada bukti cukup adanya autokorelasi positif.
# ================================================
# Uji Heteroskedastisitas pada model Hildreth–Lu
# ================================================
library(lmtest)
bptest(model_hl) # Breusch-Pagan
##
## studentized Breusch-Pagan test
##
## data: model_hl
## BP = 20.197, df = 2, p-value = 4.115e-05
Breusch–Pagan test pada model Hildreth–Lu ⇒ H0 ditolak, artinya heteroskedastisitas masih ada meski autokorelasi sudah ditangani.
# ================================================
# heteroskedastisitas terdeteksi → WLS
# ================================================
fit_ols <- fitted(model_ols)
wts_hl <- 1 / (fit_ols^2)
model_hl_wls <- lm(inv ~ value + capital, data = df, weights = wts_hl)
summary(model_hl_wls)
##
## Call:
## lm(formula = inv ~ value + capital, data = df, weights = wts_hl)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.2145 -0.2865 -0.0666 0.2585 4.1670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.123186 2.087840 -1.017 0.31
## value 0.084365 0.006265 13.466 <2e-16 ***
## capital 0.125311 0.013336 9.397 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5722 on 197 degrees of freedom
## Multiple R-squared: 0.5555, Adjusted R-squared: 0.551
## F-statistic: 123.1 on 2 and 197 DF, p-value: < 2.2e-16
bptest(model_hl_wls)
##
## studentized Breusch-Pagan test
##
## data: model_hl_wls
## BP = 0.00011569, df = 2, p-value = 0.9999
Breusch–Pagan test:
BP = 0.0001, p-value = 0.9999
Jauh di atas 0.05 → H0 diterima
⇒ Ragam residual sama / homoskedastisitas terpenuhi ✅ autokorelasi
resid_hl_wls <- residuals(model_hl_wls)
# 2. Shapiro–Wilk test
shapiro.test(resid_hl_wls)
##
## Shapiro-Wilk normality test
##
## data: resid_hl_wls
## W = 0.72146, p-value < 2.2e-16
# 3. Visualisasi
par(mfrow=c(1,2))
hist(resid_hl_wls, breaks=20, main="Histogram Residual", xlab="Residuals")
qqnorm(resid_hl_wls, main="Q-Q Plot Residual")
qqline(resid_hl_wls, col="red")
Karena p-value < 0.05, maka H0 ditolak → residual tidak berdistribusi normal.
# ================================================
# Bootstrap untuk model WLS hasil Hildreth–Lu
# ================================================
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:car':
##
## logit
# Definisikan fungsi statistik (koefisien regresi)
boot_fn <- function(data, indices) {
d <- data[indices, ] # resample baris data
fit <- lm(inv ~ value + capital, data = d, weights = d$wts_hl)
return(coef(fit)) # ambil koefisien regresi
}
# Jalankan bootstrap
set.seed(2025)
boot_res <- boot(data = df, statistic = boot_fn, R = 1000)
# Ringkasan hasil bootstrap
print(boot_res)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = df, statistic = boot_fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* -42.7143694 2.2326012272 11.780263200
## t2* 0.1155622 0.0001766903 0.007223751
## t3* 0.2306785 -0.0086874657 0.049781988
# Confidence interval bootstrap untuk setiap koefisien
boot.ci(boot_res, type = "perc", index = 1) # intercept
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 1)
##
## Intervals :
## Level Percentile
## 95% (-60.66, -15.19 )
## Calculations and Intervals on Original Scale
boot.ci(boot_res, type = "perc", index = 2) # value
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 2)
##
## Intervals :
## Level Percentile
## 95% ( 0.1024, 0.1306 )
## Calculations and Intervals on Original Scale
boot.ci(boot_res, type = "perc", index = 3) # capital
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 3)
##
## Intervals :
## Level Percentile
## 95% ( 0.1167, 0.3126 )
## Calculations and Intervals on Original Scale
# ======================================================
# OUTPUT MODEL FINAL
# ======================================================
cat("\n--- MODEL FINAL (Setelah Handling Asumsi) ---\n")
##
## --- MODEL FINAL (Setelah Handling Asumsi) ---
print(summary(model_hl_wls))
##
## Call:
## lm(formula = inv ~ value + capital, data = df, weights = wts_hl)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.2145 -0.2865 -0.0666 0.2585 4.1670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.123186 2.087840 -1.017 0.31
## value 0.084365 0.006265 13.466 <2e-16 ***
## capital 0.125311 0.013336 9.397 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5722 on 197 degrees of freedom
## Multiple R-squared: 0.5555, Adjusted R-squared: 0.551
## F-statistic: 123.1 on 2 and 197 DF, p-value: < 2.2e-16
# ================================================
# Confidence Interval Bootstrap untuk Model Final
# ================================================
# Intercept
ci_intercept <- boot.ci(boot_res, type = "perc", index = 1)
# Koefisien value
ci_value <- boot.ci(boot_res, type = "perc", index = 2)
# Koefisien capital
ci_capital <- boot.ci(boot_res, type = "perc", index = 3)
cat("\n--- Bootstrap 95% CI ---\n")
##
## --- Bootstrap 95% CI ---
print(ci_intercept)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 1)
##
## Intervals :
## Level Percentile
## 95% (-60.66, -15.19 )
## Calculations and Intervals on Original Scale
print(ci_value)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 2)
##
## Intervals :
## Level Percentile
## 95% ( 0.1024, 0.1306 )
## Calculations and Intervals on Original Scale
print(ci_capital)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 3)
##
## Intervals :
## Level Percentile
## 95% ( 0.1167, 0.3126 )
## Calculations and Intervals on Original Scale
ci_table <- data.frame(
Term = c("Intercept", "Value", "Capital"),
Estimate = boot_res$t0,
CI_Lower = c(ci_intercept$percent[4], ci_value$percent[4], ci_capital$percent[4]),
CI_Upper = c(ci_intercept$percent[5], ci_value$percent[5], ci_capital$percent[5])
)
print(ci_table)
## Term Estimate CI_Lower CI_Upper
## (Intercept) Intercept -42.7143694 -60.6560217 -15.1875930
## value Value 0.1155622 0.1023890 0.1305788
## capital Capital 0.2306785 0.1166866 0.3126279