# ============================================================
#   LAPORAN ANALISIS REGRESI 
# ============================================================

library(ggplot2)
library(GGally)
library(broom)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# ============================================================
# 1. INPUT DATA (contoh)
# ============================================================

data <- data.frame(
  Provinsi = c("Aceh","Sumatera Utara","Sumatera Barat","Riau","Jambi","Sumatera Selatan",
               "Bengkulu","Lampung","Kep. Bangka Belitung","Kep. Riau","DKI Jakarta",
               "Jawa Barat","Jawa Tengah","DI Yogyakarta","Jawa Timur","Banten","Bali",
               "Nusa Tenggara Barat","Nusa Tenggara Timur","Kalimantan Barat",
               "Kalimantan Tengah","Kalimantan Selatan","Kalimantan Timur",
               "Kalimantan Utara","Sulawesi Utara","Sulawesi Tengah","Sulawesi Selatan",
               "Sulawesi Tenggara","Gorontalo","Papua"),
  y = c(56.670469,64.505361,67.206072,65.775128,64.232642,61.508438,61.762359,
        58.869913,67.361008,75.380003,143.51795,70.470327,66.20281,74.574694,
        69.105442,71.531178,72.204439,61.479465,56.583729,59.343364,65.977963,
        67.91521,71.383242,65.115129,65.249037,62.594164,65.092001,64.143112,
        59.787524,55.311352),
  x1 = c(13250,15560,16500,17600,14950,13800,14200,13550,18000,25210,29874,16900,
         16050,18245,17120,18500,21367,12500,11800,14500,15200,16800,23871,
         19200,16500,14100,15650,14300,13900,11000),
  x2 = c(9.2,9.7,9.9,10.1,9.4,9,9.3,9.1,10.3,10.3,11.2,9.5,9.4,10.9,9.6,9.8,
         10.5,8.8,8.3,9.2,9.5,9.8,9.8,9.6,9.7,8.9,9.5,9.2,8.9,7.8),
  x3 = c(90,200,145,125,100,120,95,270,300,280,16100,1400,1100,1269,840,1400,
         750,270,180,130,110,98,94,80,230,150,220,140,130,40)
)

cat("===== DATA AWAL =====\n")
## ===== DATA AWAL =====
print(head(data))
##           Provinsi        y    x1   x2  x3
## 1             Aceh 56.67047 13250  9.2  90
## 2   Sumatera Utara 64.50536 15560  9.7 200
## 3   Sumatera Barat 67.20607 16500  9.9 145
## 4             Riau 65.77513 17600 10.1 125
## 5            Jambi 64.23264 14950  9.4 100
## 6 Sumatera Selatan 61.50844 13800  9.0 120
# ============================================================
# 2. EKSPLORASI DATA
# ============================================================

cat("\n===== SUMMARY STATISTIK =====\n")
## 
## ===== SUMMARY STATISTIK =====
print(summary(data))
##    Provinsi               y                x1              x2       
##  Length:30          Min.   : 55.31   Min.   :11000   Min.   : 7.80  
##  Class :character   1st Qu.: 61.57   1st Qu.:14125   1st Qu.: 9.20  
##  Mode  :character   Median : 65.18   Median :15850   Median : 9.50  
##                     Mean   : 67.70   Mean   :16667   Mean   : 9.54  
##                     3rd Qu.: 68.81   3rd Qu.:17900   3rd Qu.: 9.80  
##                     Max.   :143.52   Max.   :29874   Max.   :11.20  
##        x3         
##  Min.   :   40.0  
##  1st Qu.:  112.5  
##  Median :  165.0  
##  Mean   :  881.9  
##  3rd Qu.:  295.0  
##  Max.   :16100.0
# Scatterplot matrix
pairs(data[,c("y","x1","x2","x3")], main = "Scatterplot Matrix")

# Histogram variabel Y
ggplot(data, aes(y)) +
  geom_histogram(bins = 10, fill = "steelblue") +
  theme_minimal() +
  labs(title = "Histogram Variabel Y")

# ============================================================
# 3. REGRESI DAN UJI ASUMSI
# ============================================================

model <- lm(y ~ x1 + x2 + x3, data = data)

cat("\n===== RINGKASAN MODEL REGRESI =====\n")
## 
## ===== RINGKASAN MODEL REGRESI =====
print(summary(model))
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.077 -1.114  0.045  1.018  2.691 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.478e+01  5.880e+00   4.214 0.000267 ***
## x1          8.562e-04  1.560e-04   5.490 9.27e-06 ***
## x2          2.638e+00  7.978e-01   3.307 0.002763 ** 
## x3          3.947e-03  1.421e-04  27.774  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.705 on 26 degrees of freedom
## Multiple R-squared:  0.9888, Adjusted R-squared:  0.9875 
## F-statistic: 762.8 on 3 and 26 DF,  p-value: < 2.2e-16
# QQ Plot - uji normalitas residual
qqnorm(model$residuals)
qqline(model$residuals)

# Plot Residual vs Fitted
plot(model$fitted.values, model$residuals,
     xlab = "Nilai Fitted", ylab = "Residual",
     main = "Residual vs Fitted")
abline(h = 0, col = "red")

# ============================================================
# 4. MANUAL OLS (β = (X'X)^(-1) X'Y)
# ============================================================

X <- as.matrix(cbind(1, data$x1, data$x2, data$x3))
Y <- as.matrix(data$y)

beta_manual <- solve(t(X) %*% X) %*% (t(X) %*% Y)

cat("\n===== KOEFISIEN MANUAL OLS =====\n")
## 
## ===== KOEFISIEN MANUAL OLS =====
print(beta_manual)
##              [,1]
## [1,] 2.477712e+01
## [2,] 8.562453e-04
## [3,] 2.637970e+00
## [4,] 3.947418e-03
# ============================================================
# 5. UJI HIPOTESIS
# ============================================================

cat("\n===== UJI F (ANOVA) =====\n")
## 
## ===== UJI F (ANOVA) =====
print(anova(model))
## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq   F value Pr(>F)    
## x1         1 4403.4  4403.4 1515.5799 <2e-16 ***
## x2         1    3.7     3.7    1.2858 0.2672    
## x3         1 2241.3  2241.3  771.4113 <2e-16 ***
## Residuals 26   75.5     2.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n===== UJI t =====\n")
## 
## ===== UJI t =====
print(summary(model)$coefficients)
##                 Estimate   Std. Error   t value     Pr(>|t|)
## (Intercept) 2.477712e+01 5.8795476880  4.214119 2.669677e-04
## x1          8.562453e-04 0.0001559636  5.490032 9.265546e-06
## x2          2.637970e+00 0.7978129338  3.306502 2.763272e-03
## x3          3.947418e-03 0.0001421249 27.774291 7.408634e-21
# ============================================================
# 6. EVALUASI MODEL
# ============================================================

cat("\n===== NILAI R2 / AIC / BIC =====\n")
## 
## ===== NILAI R2 / AIC / BIC =====
print(glance(model))
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.989         0.987  1.70      763. 1.89e-25     3  -56.4  123.  130.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
cat("\n===== PREDIKSI MODEL =====\n")
## 
## ===== PREDIKSI MODEL =====
print(predict(model))
##         1         2         3         4         5         6         7         8 
##  60.74696  64.47809  65.59344  66.98396  62.76965  60.80872  61.84393  61.45057 
##         9        10        11        12        13        14        15        16 
##  68.54485  74.63943 143.45528  69.83476  67.65893  74.16246  68.07638  71.99615 
##        17        18        19        20        21        22        23        24 
##  73.73176  59.76012  57.48650  61.97516  63.28698  65.40099  71.43971  66.85733 
##        25        26        27        28        29        30 
##  65.40138  60.92022  64.10650  61.84339  60.67002  54.92988