# ============================================================
# LAPORAN ANALISIS REGRESI
# ============================================================
library(ggplot2)
library(GGally)
library(broom)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# ============================================================
# 1. INPUT DATA (contoh)
# ============================================================
data <- data.frame(
Provinsi = c("Aceh","Sumatera Utara","Sumatera Barat","Riau","Jambi","Sumatera Selatan",
"Bengkulu","Lampung","Kep. Bangka Belitung","Kep. Riau","DKI Jakarta",
"Jawa Barat","Jawa Tengah","DI Yogyakarta","Jawa Timur","Banten","Bali",
"Nusa Tenggara Barat","Nusa Tenggara Timur","Kalimantan Barat",
"Kalimantan Tengah","Kalimantan Selatan","Kalimantan Timur",
"Kalimantan Utara","Sulawesi Utara","Sulawesi Tengah","Sulawesi Selatan",
"Sulawesi Tenggara","Gorontalo","Papua"),
y = c(56.670469,64.505361,67.206072,65.775128,64.232642,61.508438,61.762359,
58.869913,67.361008,75.380003,143.51795,70.470327,66.20281,74.574694,
69.105442,71.531178,72.204439,61.479465,56.583729,59.343364,65.977963,
67.91521,71.383242,65.115129,65.249037,62.594164,65.092001,64.143112,
59.787524,55.311352),
x1 = c(13250,15560,16500,17600,14950,13800,14200,13550,18000,25210,29874,16900,
16050,18245,17120,18500,21367,12500,11800,14500,15200,16800,23871,
19200,16500,14100,15650,14300,13900,11000),
x2 = c(9.2,9.7,9.9,10.1,9.4,9,9.3,9.1,10.3,10.3,11.2,9.5,9.4,10.9,9.6,9.8,
10.5,8.8,8.3,9.2,9.5,9.8,9.8,9.6,9.7,8.9,9.5,9.2,8.9,7.8),
x3 = c(90,200,145,125,100,120,95,270,300,280,16100,1400,1100,1269,840,1400,
750,270,180,130,110,98,94,80,230,150,220,140,130,40)
)
cat("===== DATA AWAL =====\n")
## ===== DATA AWAL =====
print(head(data))
## Provinsi y x1 x2 x3
## 1 Aceh 56.67047 13250 9.2 90
## 2 Sumatera Utara 64.50536 15560 9.7 200
## 3 Sumatera Barat 67.20607 16500 9.9 145
## 4 Riau 65.77513 17600 10.1 125
## 5 Jambi 64.23264 14950 9.4 100
## 6 Sumatera Selatan 61.50844 13800 9.0 120
# ============================================================
# 2. EKSPLORASI DATA
# ============================================================
cat("\n===== SUMMARY STATISTIK =====\n")
##
## ===== SUMMARY STATISTIK =====
print(summary(data))
## Provinsi y x1 x2
## Length:30 Min. : 55.31 Min. :11000 Min. : 7.80
## Class :character 1st Qu.: 61.57 1st Qu.:14125 1st Qu.: 9.20
## Mode :character Median : 65.18 Median :15850 Median : 9.50
## Mean : 67.70 Mean :16667 Mean : 9.54
## 3rd Qu.: 68.81 3rd Qu.:17900 3rd Qu.: 9.80
## Max. :143.52 Max. :29874 Max. :11.20
## x3
## Min. : 40.0
## 1st Qu.: 112.5
## Median : 165.0
## Mean : 881.9
## 3rd Qu.: 295.0
## Max. :16100.0
# Scatterplot matrix
pairs(data[,c("y","x1","x2","x3")], main = "Scatterplot Matrix")

# Histogram variabel Y
ggplot(data, aes(y)) +
geom_histogram(bins = 10, fill = "steelblue") +
theme_minimal() +
labs(title = "Histogram Variabel Y")

# ============================================================
# 3. REGRESI DAN UJI ASUMSI
# ============================================================
model <- lm(y ~ x1 + x2 + x3, data = data)
cat("\n===== RINGKASAN MODEL REGRESI =====\n")
##
## ===== RINGKASAN MODEL REGRESI =====
print(summary(model))
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.077 -1.114 0.045 1.018 2.691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.478e+01 5.880e+00 4.214 0.000267 ***
## x1 8.562e-04 1.560e-04 5.490 9.27e-06 ***
## x2 2.638e+00 7.978e-01 3.307 0.002763 **
## x3 3.947e-03 1.421e-04 27.774 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.705 on 26 degrees of freedom
## Multiple R-squared: 0.9888, Adjusted R-squared: 0.9875
## F-statistic: 762.8 on 3 and 26 DF, p-value: < 2.2e-16
# QQ Plot - uji normalitas residual
qqnorm(model$residuals)
qqline(model$residuals)

# Plot Residual vs Fitted
plot(model$fitted.values, model$residuals,
xlab = "Nilai Fitted", ylab = "Residual",
main = "Residual vs Fitted")
abline(h = 0, col = "red")

# ============================================================
# 4. MANUAL OLS (β = (X'X)^(-1) X'Y)
# ============================================================
X <- as.matrix(cbind(1, data$x1, data$x2, data$x3))
Y <- as.matrix(data$y)
beta_manual <- solve(t(X) %*% X) %*% (t(X) %*% Y)
cat("\n===== KOEFISIEN MANUAL OLS =====\n")
##
## ===== KOEFISIEN MANUAL OLS =====
print(beta_manual)
## [,1]
## [1,] 2.477712e+01
## [2,] 8.562453e-04
## [3,] 2.637970e+00
## [4,] 3.947418e-03
# ============================================================
# 5. UJI HIPOTESIS
# ============================================================
cat("\n===== UJI F (ANOVA) =====\n")
##
## ===== UJI F (ANOVA) =====
print(anova(model))
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 4403.4 4403.4 1515.5799 <2e-16 ***
## x2 1 3.7 3.7 1.2858 0.2672
## x3 1 2241.3 2241.3 771.4113 <2e-16 ***
## Residuals 26 75.5 2.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n===== UJI t =====\n")
##
## ===== UJI t =====
print(summary(model)$coefficients)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.477712e+01 5.8795476880 4.214119 2.669677e-04
## x1 8.562453e-04 0.0001559636 5.490032 9.265546e-06
## x2 2.637970e+00 0.7978129338 3.306502 2.763272e-03
## x3 3.947418e-03 0.0001421249 27.774291 7.408634e-21
# ============================================================
# 6. EVALUASI MODEL
# ============================================================
cat("\n===== NILAI R2 / AIC / BIC =====\n")
##
## ===== NILAI R2 / AIC / BIC =====
print(glance(model))
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.989 0.987 1.70 763. 1.89e-25 3 -56.4 123. 130.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
cat("\n===== PREDIKSI MODEL =====\n")
##
## ===== PREDIKSI MODEL =====
print(predict(model))
## 1 2 3 4 5 6 7 8
## 60.74696 64.47809 65.59344 66.98396 62.76965 60.80872 61.84393 61.45057
## 9 10 11 12 13 14 15 16
## 68.54485 74.63943 143.45528 69.83476 67.65893 74.16246 68.07638 71.99615
## 17 18 19 20 21 22 23 24
## 73.73176 59.76012 57.48650 61.97516 63.28698 65.40099 71.43971 66.85733
## 25 26 27 28 29 30
## 65.40138 60.92022 64.10650 61.84339 60.67002 54.92988