library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
library(Matrix)
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.5.2
1. Pengumpulan data
data <- read_excel("Data regresi vhallina.xlsx")
head(data)
## # A tibble: 6 × 4
## Y X1 X2 X3
## <dbl> <dbl> <dbl> <dbl>
## 1 12 4.4 3 27.2
## 2 5.3 2.4 4 28.1
## 3 5.6 2 5 28.1
## 4 22.9 7.6 3 27.3
## 5 4.7 2.2 4 28
## 6 17.1 5.3 3 26.4
2. Eksplorasi Data
str(data)
## tibble [30 × 4] (S3: tbl_df/tbl/data.frame)
## $ Y : num [1:30] 12 5.3 5.6 22.9 4.7 17.1 17.4 20 17.2 2.8 ...
## $ X1: num [1:30] 4.4 2.4 2 7.6 2.2 5.3 5.6 5.6 3.9 1.3 ...
## $ X2: num [1:30] 3 4 5 3 4 3 4 6 6 4 ...
## $ X3: num [1:30] 27.2 28.1 28.1 27.3 28 26.4 27.7 28.3 26.8 27.4 ...
summary(data)
## Y X1 X2 X3
## Min. : 0.50 Min. :0.300 Min. :3.000 Min. :24.90
## 1st Qu.: 5.30 1st Qu.:2.250 1st Qu.:4.000 1st Qu.:26.82
## Median :13.55 Median :4.150 Median :4.000 Median :27.70
## Mean :13.72 Mean :4.273 Mean :4.333 Mean :27.53
## 3rd Qu.:21.38 3rd Qu.:6.050 3rd Qu.:5.000 3rd Qu.:28.18
## Max. :35.10 Max. :9.300 Max. :6.000 Max. :29.80
cor(data)
## Y X1 X2 X3
## Y 1.0000000 0.97645350 0.07738010 -0.2727101
## X1 0.9764535 1.00000000 -0.02162416 -0.1765364
## X2 0.0773801 -0.02162416 1.00000000 0.4994986
## X3 -0.2727101 -0.17653640 0.49949859 1.0000000
boxplot(data$Y)

boxplot(data$X1)

boxplot(data$X2)

boxplot(data$X3)

3. Uji Asumsi Regresi
model <- lm(Y ~ ., data = data)
# Normalitas residual
plot(model, which = 2)

shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.50277, p-value = 5.733e-09
# Homoskedastisitas
plot(model, which = 3)

bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 3.2951, df = 3, p-value = 0.3483
# Autokorelasi
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 2.1967, p-value = 0.6739
## alternative hypothesis: true autocorrelation is greater than 0
# Multikolinearitas
vif(model)
## X1 X2 X3
## 1.038494 1.340610 1.383087
4. Estimasi parameter
summary(model)
##
## Call:
## lm(formula = Y ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5626 -0.3296 -0.1374 0.0886 3.6775
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.10249 4.37783 10.07 1.82e-10 ***
## X1 3.43620 0.05658 60.73 < 2e-16 ***
## X2 1.92568 0.16939 11.37 1.38e-11 ***
## X3 -1.93999 0.16874 -11.50 1.08e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7833 on 26 degrees of freedom
## Multiple R-squared: 0.9939, Adjusted R-squared: 0.9932
## F-statistic: 1423 on 3 and 26 DF, p-value: < 2.2e-16
5. Pengujian hipotesis
anova(model)
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 2512.27 2512.27 4094.51 < 2.2e-16 ***
## X2 1 25.57 25.57 41.68 7.673e-07 ***
## X3 1 81.10 81.10 132.17 1.077e-11 ***
## Residuals 26 15.95 0.61
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6. Evaluasi model
summary(model)$r.squared
## [1] 0.9939455
summary(model)$adj.r.squared
## [1] 0.993247
plot(model, which = 1)

y_pred <- predict(model)
y_true <- data$Y
y_true <- as.numeric(as.character(y_true))
y_pred <- as.numeric(y_pred)
rmse(y_true, y_pred)
## [1] 0.7292197
mae(y_true, y_pred)
## [1] 0.3572122
mape(y_true, y_pred)
## [1] 0.1258845