library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
library(Metrics)
library(readxl)
1. Pengumpulan Data
data <- read_excel("C:/Users/acer/Downloads/Persentase Penduduk Miskin (P0) Menurut Kabupaten_Kota, 2024.xlsx")
head(data)
## # A tibble: 6 × 4
## Y X1 X2 X3
## <dbl> <dbl> <dbl> <dbl>
## 1 73.4 11835 8.01 1093
## 2 71.8 11432 7.39 864
## 3 73.9 12492 7.87 1314
## 4 70.2 10964 7.34 1275
## 5 69.1 10226 6.86 915
## 6 71.4 9734 7.86 1048
2. Eksplorasi Data
str(data)
## tibble [36 × 4] (S3: tbl_df/tbl/data.frame)
## $ Y : num [1:36] 73.4 71.8 73.9 70.2 69.1 ...
## $ X1: num [1:36] 11835 11432 12492 10964 10226 ...
## $ X2: num [1:36] 8.01 7.39 7.87 7.34 6.86 7.86 8.46 6.89 7.82 8.09 ...
## $ X3: num [1:36] 1093 864 1314 1275 915 ...
summary(data)
## Y X1 X2 X3
## Min. :67.95 Min. : 9587 Min. : 6.400 Min. : 461
## 1st Qu.:71.36 1st Qu.:10858 1st Qu.: 7.378 1st Qu.: 930
## Median :73.72 Median :11416 Median : 7.865 Median : 1150
## Mean :74.14 Mean :12019 Mean : 8.248 Mean : 2039
## 3rd Qu.:76.71 3rd Qu.:12949 3rd Qu.: 9.075 3rd Qu.: 1721
## Max. :84.99 Max. :16650 Max. :11.240 Max. :11277
cor(data)
## Y X1 X2 X3
## Y 1.0000000 0.8940400 0.9719624 0.6717837
## X1 0.8940400 1.0000000 0.8088355 0.6787900
## X2 0.9719624 0.8088355 1.0000000 0.7230878
## X3 0.6717837 0.6787900 0.7230878 1.0000000
boxplot(data$Y)

boxplot(data$X1)

boxplot(data$X2)

boxplot(data$X3)

3. Uji Asumsi Regresi
model <- lm(Y ~ ., data = data)
# Normalitas residual
plot(model, which = 2)

shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.98213, p-value = 0.8148
# Homoskedastisitas
plot(model, which = 3)

bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 7.2454, df = 3, p-value = 0.06447
# Autokorelasi
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 2.1378, p-value = 0.6022
## alternative hypothesis: true autocorrelation is greater than 0
# Multikolinearitas (jika regresi berganda)
vif(model)
## X1 X2 X3
## 3.055358 3.453012 2.214211
4. Estimasi parameter
summary(model)
##
## Call:
## lm(formula = Y ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.07372 -0.34571 0.00874 0.39835 1.07212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.247e+01 8.428e-01 50.390 < 2e-16 ***
## X1 8.565e-04 8.852e-05 9.676 5.05e-11 ***
## X2 2.653e+00 1.290e-01 20.563 < 2e-16 ***
## X3 -2.464e-04 5.651e-05 -4.360 0.000126 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5306 on 32 degrees of freedom
## Multiple R-squared: 0.9864, Adjusted R-squared: 0.9852
## F-statistic: 775.4 on 3 and 32 DF, p-value: < 2.2e-16
5. Pengujian hipotesis
anova(model) # Uji F
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 530.75 530.75 1884.901 < 2.2e-16 ***
## X2 1 118.90 118.90 422.258 < 2.2e-16 ***
## X3 1 5.35 5.35 19.009 0.0001262 ***
## Residuals 32 9.01 0.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6. Evaluasi model
summary(model)$r.squared
## [1] 0.9864301
summary(model)$adj.r.squared
## [1] 0.985158
plot(model, which = 1)

y_pred <- predict(model)
y_true <- data$Y
y_true <- as.numeric(as.character(y_true))
y_pred <- as.numeric(y_pred)
rmse(y_true, y_pred)
## [1] 0.5002935
mae(y_true, y_pred)
## [1] 0.4096924
mape(y_true, y_pred)
## [1] 0.005517475