#Analisis Regresi Linier
Analisis regresi linier berganda digunakan untuk mengetahui hubungan antara satu variabel dependen (Y) dengan lebih dari satu variabel independen (X). Pada analisis ini akan dilakukan tahapan pengumpulan data, eksplorasi data, uji asumsi klasik, estimasi parameter regresi, pengujian hipotesis, serta evaluasi model regresi.
data <- read.delim("clipboard")
Y <- as.numeric(data$Y)
X1 <- as.numeric(data$X1)
X2 <- as.numeric(data$X2)
X3 <- as.numeric(data$X3)
knitr::opts_chunk$set(echo = TRUE)
###Statistik Deksriptif
summary(data)
## Y X1 X2 X3
## Min. :61.94 Min. : 129014 Min. : 4.550 Min. : 6.63
## 1st Qu.:68.19 1st Qu.: 682758 1st Qu.: 6.853 1st Qu.: 70.48
## Median :71.14 Median :1012978 Median : 7.460 Median :112.62
## Mean :71.69 Mean :1044701 Mean : 7.798 Mean :108.22
## 3rd Qu.:74.82 3rd Qu.:1234546 3rd Qu.: 8.967 3rd Qu.:153.13
## Max. :82.22 Max. :2896195 Max. :11.130 Max. :246.60
knitr::opts_chunk$set(echo = TRUE)
par(mfrow=c(1,4))
boxplot(Y, main="Boxplot Y")
boxplot(X1, main="Boxplot X1")
boxplot(X2, main="Boxplot X2")
boxplot(X3, main="Boxplot X3")
par(mfrow=c(1,1))
knitr::opts_chunk$set(echo = TRUE)
model <- lm(Y ~ X1 + X2 + X3, data = data)
knitr::opts_chunk$set(echo = TRUE)
res <- residuals(model)
shapiro.test(res)
##
## Shapiro-Wilk normality test
##
## data: res
## W = 0.97949, p-value = 0.6996
qqnorm(res)
qqline(res)
knitr::opts_chunk$set(echo = TRUE)
plot(fitted(model), res,
xlab = "Nilai Fitted",
ylab = "Residual",
main = "Plot Residual vs Fitted")
abline(h = 0, lty = 2)
knitr::opts_chunk$set(echo = TRUE)
library(car)
## Loading required package: carData
vif(model)
## X1 X2 X3
## 3.440511 2.770464 5.752751
knitr::opts_chunk$set(echo = TRUE)
X <- cbind(1, X1, X2, X3)
n <- length(Y)
p <- ncol(X) - 1
beta_hat <- solve(t(X) %*% X) %*% t(X) %*% Y
rownames(beta_hat) <- c("Intercept", "X1", "X2", "X3")
beta_hat
## [,1]
## Intercept 4.745655e+01
## X1 7.688381e-07
## X2 3.058714e+00
## X3 -3.931958e-03
knitr::opts_chunk$set(echo = TRUE)
Y_hat <- X %*% beta_hat
residual <- Y - Y_hat
SSE <- as.numeric(t(residual) %*% residual)
df_error <- n - (p + 1)
MSE <- SSE / df_error
Y_bar <- mean(Y)
SSR <- sum((Y_hat - Y_bar)^2)
MSR <- SSR / p
F_hitung <- MSR / MSE
p_value_F <- pf(F_hitung, p, df_error, lower.tail = FALSE)
c(F_hitung = F_hitung, p_value = p_value_F)
## F_hitung p_value
## 3.085429e+02 1.021106e-24
knitr::opts_chunk$set(echo = TRUE)
var_beta <- MSE * solve(t(X) %*% X)
SE_beta <- sqrt(diag(var_beta))
t_hitung <- as.numeric(beta_hat) / SE_beta
p_value_t <- 2 * pt(-abs(t_hitung), df = df_error)
hasil_uji_t <- data.frame(
Koefisien = rownames(beta_hat),
Estimasi = as.numeric(beta_hat),
StdError = SE_beta,
t_hitung = t_hitung,
p_value = p_value_t
)
hasil_uji_t
## Koefisien Estimasi StdError t_hitung p_value
## Intercept 4.745655e+01 1.584621e+00 29.9482011 5.063640e-26
## X1 X1 7.688381e-07 4.606860e-07 1.6688982 1.043220e-01
## X2 X2 3.058714e+00 1.710021e-01 17.8869932 7.192964e-19
## X3 X3 -3.931958e-03 5.986305e-03 -0.6568255 5.157147e-01
knitr::opts_chunk$set(echo = TRUE)
SST <- sum((Y - Y_bar)^2)
R2 <- SSR / SST
Adj_R2 <- 1 - ((SSE / df_error) / (SST / (n - 1)))
c(R_squared = R2, Adjusted_R_squared = Adj_R2)
## R_squared Adjusted_R_squared
## 0.9645696 0.9614434
knitr::opts_chunk$set(echo = TRUE)
summary(model)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69264 -0.71255 -0.03147 0.60234 2.22607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.746e+01 1.585e+00 29.948 <2e-16 ***
## X1 7.688e-07 4.607e-07 1.669 0.104
## X2 3.059e+00 1.710e-01 17.887 <2e-16 ***
## X3 -3.932e-03 5.986e-03 -0.657 0.516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.01 on 34 degrees of freedom
## Multiple R-squared: 0.9646, Adjusted R-squared: 0.9614
## F-statistic: 308.5 on 3 and 34 DF, p-value: < 2.2e-16
knitr::opts_chunk$set(echo = TRUE)