library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Loading required package: carData
library(Metrics)
library(readxl)

1. Pengumpulan Data

data <- read_excel("C:/Users/acer/Downloads/Persentase Penduduk Miskin (P0) Menurut Kabupaten_Kota, 2024.xlsx")
head(data)
## # A tibble: 6 × 4
##       Y    X1    X2    X3
##   <dbl> <dbl> <dbl> <dbl>
## 1  73.4 11835  8.01  1093
## 2  71.8 11432  7.39   864
## 3  73.9 12492  7.87  1314
## 4  70.2 10964  7.34  1275
## 5  69.1 10226  6.86   915
## 6  71.4  9734  7.86  1048

2. Eksplorasi Data

str(data)
## tibble [36 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Y : num [1:36] 73.4 71.8 73.9 70.2 69.1 ...
##  $ X1: num [1:36] 11835 11432 12492 10964 10226 ...
##  $ X2: num [1:36] 8.01 7.39 7.87 7.34 6.86 7.86 8.46 6.89 7.82 8.09 ...
##  $ X3: num [1:36] 1093 864 1314 1275 915 ...
summary(data)
##        Y               X1              X2               X3       
##  Min.   :67.95   Min.   : 9587   Min.   : 6.400   Min.   :  461  
##  1st Qu.:71.36   1st Qu.:10858   1st Qu.: 7.378   1st Qu.:  930  
##  Median :73.72   Median :11416   Median : 7.865   Median : 1150  
##  Mean   :74.14   Mean   :12019   Mean   : 8.248   Mean   : 2039  
##  3rd Qu.:76.71   3rd Qu.:12949   3rd Qu.: 9.075   3rd Qu.: 1721  
##  Max.   :84.99   Max.   :16650   Max.   :11.240   Max.   :11277
cor(data)
##            Y        X1        X2        X3
## Y  1.0000000 0.8940400 0.9719624 0.6717837
## X1 0.8940400 1.0000000 0.8088355 0.6787900
## X2 0.9719624 0.8088355 1.0000000 0.7230878
## X3 0.6717837 0.6787900 0.7230878 1.0000000
boxplot(data$Y)

boxplot(data$X1)

boxplot(data$X2)

boxplot(data$X3)

3. Uji Asumsi Regresi

model <- lm(Y ~ ., data = data)
# Normalitas residual
plot(model, which = 2)

shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.98213, p-value = 0.8148
# Homoskedastisitas
plot(model, which = 3)

bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 7.2454, df = 3, p-value = 0.06447
# Autokorelasi
dwtest(model)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 2.1378, p-value = 0.6022
## alternative hypothesis: true autocorrelation is greater than 0
# Multikolinearitas (jika regresi berganda)
vif(model)
##       X1       X2       X3 
## 3.055358 3.453012 2.214211

4. Estimasi parameter

summary(model)
## 
## Call:
## lm(formula = Y ~ ., data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.07372 -0.34571  0.00874  0.39835  1.07212 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.247e+01  8.428e-01  50.390  < 2e-16 ***
## X1           8.565e-04  8.852e-05   9.676 5.05e-11 ***
## X2           2.653e+00  1.290e-01  20.563  < 2e-16 ***
## X3          -2.464e-04  5.651e-05  -4.360 0.000126 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5306 on 32 degrees of freedom
## Multiple R-squared:  0.9864, Adjusted R-squared:  0.9852 
## F-statistic: 775.4 on 3 and 32 DF,  p-value: < 2.2e-16

5. Pengujian hipotesis

anova(model)   # Uji F
## Analysis of Variance Table
## 
## Response: Y
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## X1         1 530.75  530.75 1884.901 < 2.2e-16 ***
## X2         1 118.90  118.90  422.258 < 2.2e-16 ***
## X3         1   5.35    5.35   19.009 0.0001262 ***
## Residuals 32   9.01    0.28                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6. Evaluasi model

summary(model)$r.squared
## [1] 0.9864301
summary(model)$adj.r.squared
## [1] 0.985158
plot(model, which = 1)

y_pred <- predict(model)
y_true <- data$Y

y_true <- as.numeric(as.character(y_true))
y_pred <- as.numeric(y_pred)

rmse(y_true, y_pred)
## [1] 0.5002935
mae(y_true, y_pred)
## [1] 0.4096924
mape(y_true, y_pred)
## [1] 0.005517475