library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
library(Matrix)
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
library(Metrics)    
## Warning: package 'Metrics' was built under R version 4.5.2

1. Pengumpulan data

data <- read_excel("Data regresi vhallina.xlsx")
head(data)
## # A tibble: 6 × 4
##       Y    X1    X2    X3
##   <dbl> <dbl> <dbl> <dbl>
## 1  12     4.4     3  27.2
## 2   5.3   2.4     4  28.1
## 3   5.6   2       5  28.1
## 4  22.9   7.6     3  27.3
## 5   4.7   2.2     4  28  
## 6  17.1   5.3     3  26.4

2. Eksplorasi Data

str(data)
## tibble [30 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Y : num [1:30] 12 5.3 5.6 22.9 4.7 17.1 17.4 20 17.2 2.8 ...
##  $ X1: num [1:30] 4.4 2.4 2 7.6 2.2 5.3 5.6 5.6 3.9 1.3 ...
##  $ X2: num [1:30] 3 4 5 3 4 3 4 6 6 4 ...
##  $ X3: num [1:30] 27.2 28.1 28.1 27.3 28 26.4 27.7 28.3 26.8 27.4 ...
summary(data)
##        Y               X1              X2              X3       
##  Min.   : 0.50   Min.   :0.300   Min.   :3.000   Min.   :24.90  
##  1st Qu.: 5.30   1st Qu.:2.250   1st Qu.:4.000   1st Qu.:26.82  
##  Median :13.55   Median :4.150   Median :4.000   Median :27.70  
##  Mean   :13.72   Mean   :4.273   Mean   :4.333   Mean   :27.53  
##  3rd Qu.:21.38   3rd Qu.:6.050   3rd Qu.:5.000   3rd Qu.:28.18  
##  Max.   :35.10   Max.   :9.300   Max.   :6.000   Max.   :29.80
cor(data)
##             Y          X1          X2         X3
## Y   1.0000000  0.97645350  0.07738010 -0.2727101
## X1  0.9764535  1.00000000 -0.02162416 -0.1765364
## X2  0.0773801 -0.02162416  1.00000000  0.4994986
## X3 -0.2727101 -0.17653640  0.49949859  1.0000000
boxplot(data$Y)

boxplot(data$X1)

boxplot(data$X2)

boxplot(data$X3)

3. Uji Asumsi Regresi

model <- lm(Y ~ ., data = data)

# Normalitas residual
plot(model, which = 2)

shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.50277, p-value = 5.733e-09
# Homoskedastisitas
plot(model, which = 3)

bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 3.2951, df = 3, p-value = 0.3483
# Autokorelasi
dwtest(model)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 2.1967, p-value = 0.6739
## alternative hypothesis: true autocorrelation is greater than 0
# Multikolinearitas
vif(model)
##       X1       X2       X3 
## 1.038494 1.340610 1.383087

4. Estimasi parameter

summary(model)
## 
## Call:
## lm(formula = Y ~ ., data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5626 -0.3296 -0.1374  0.0886  3.6775 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 44.10249    4.37783   10.07 1.82e-10 ***
## X1           3.43620    0.05658   60.73  < 2e-16 ***
## X2           1.92568    0.16939   11.37 1.38e-11 ***
## X3          -1.93999    0.16874  -11.50 1.08e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7833 on 26 degrees of freedom
## Multiple R-squared:  0.9939, Adjusted R-squared:  0.9932 
## F-statistic:  1423 on 3 and 26 DF,  p-value: < 2.2e-16

5. Pengujian hipotesis

anova(model)
## Analysis of Variance Table
## 
## Response: Y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## X1         1 2512.27 2512.27 4094.51 < 2.2e-16 ***
## X2         1   25.57   25.57   41.68 7.673e-07 ***
## X3         1   81.10   81.10  132.17 1.077e-11 ***
## Residuals 26   15.95    0.61                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6. Evaluasi model

summary(model)$r.squared
## [1] 0.9939455
summary(model)$adj.r.squared
## [1] 0.993247
plot(model, which = 1)

y_pred <- predict(model)
y_true <- data$Y

y_true <- as.numeric(as.character(y_true))
y_pred <- as.numeric(y_pred)

rmse(y_true, y_pred)
## [1] 0.7292197
mae(y_true, y_pred)
## [1] 0.3572122
mape(y_true, y_pred)
## [1] 0.1258845