Regresi Linier

Hendrad09

2023-10-23

library(rsample)
library(DataExplorer)
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(openxlsx)
library(readxl)
library(car)
## Loading required package: carData
library(ggcorrplot)
## Loading required package: ggplot2

Data

data.demand <- read_excel(path = "Data/data_01.xlsx", col_names = TRUE, sheet="sheet1")
head(data.demand)
## # A tibble: 6 x 5
##   Demand Harga Harga_Kompetitor Biaya_Iklan Income
##    <dbl> <dbl>            <dbl>       <dbl>  <dbl>
## 1   1290   137               94         814  42498
## 2   1117   147               81         896  41399
## 3   1155   149               89         852  39905
## 4   1299   117               92         854  34871
## 5   1166   135               86         810  34239
## 6   1186   143               79         768  44452
dim(data.demand)
## [1] 30  5
str(data.demand)
## tibble [30 x 5] (S3: tbl_df/tbl/data.frame)
##  $ Demand          : num [1:30] 1290 1117 1155 1299 1166 ...
##  $ Harga           : num [1:30] 137 147 149 117 135 143 113 111 109 129 ...
##  $ Harga_Kompetitor: num [1:30] 94 81 89 92 86 79 91 82 81 82 ...
##  $ Biaya_Iklan     : num [1:30] 814 896 852 854 810 768 978 821 843 849 ...
##  $ Income          : num [1:30] 42498 41399 39905 34871 34239 ...
skimr::skim(data.demand)
Data summary
Name data.demand
Number of rows 30
Number of columns 5
_______________________
Column type frequency:
numeric 5
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Demand 0 1 1283.80 107.49 1089 1206.00 1293.0 1330.25 1515 ▃▃▇▁▂
Harga 0 1 127.00 14.32 103 116.25 125.5 138.00 149 ▆▇▆▇▇
Harga_Kompetitor 0 1 87.33 7.23 76 82.00 87.5 91.75 100 ▅▇▇▃▆
Biaya_Iklan 0 1 870.17 69.51 768 818.00 853.0 913.75 1000 ▆▇▃▃▃
Income 0 1 37430.03 4898.18 30247 33442.50 37543.5 41349.50 44671 ▇▆▆▇▇

Data Exploration

scatterplot(Demand ~ Harga, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

scatterplot(Demand ~ Harga_Kompetitor, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

scatterplot(Demand ~ Biaya_Iklan, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

scatterplot(Demand ~ Income, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

car::scatterplotMatrix(~Demand+Harga+Harga_Kompetitor + Biaya_Iklan+ Income, data=data.demand, reg.line=FALSE)

Korelasi

korelasi <- cor(data.demand)
ggcorrplot(korelasi, type="lower", lab = TRUE)

model <- lm(Demand ~. , data.demand)
vif(model)
##            Harga Harga_Kompetitor      Biaya_Iklan           Income 
##         1.014255         1.252316         1.249578         1.054299

Memeriksa Sebaran Data

plot_histogram(data = data.demand,nrow=3,ncol = 3,
               geom_histogram_args = list(fill="steelblue"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Model Regresi Linear

regresi <- lm(formula = Demand~., data = data.demand)
summary(regresi)
## 
## Call:
## lm(formula = Demand ~ ., data = data.demand)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -79.080 -25.163  -3.329  29.869  69.172 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      845.017716 151.749505   5.569 8.63e-06 ***
## Harga             -5.227902   0.502860 -10.396 1.45e-10 ***
## Harga_Kompetitor   5.157661   1.107094   4.659 9.02e-05 ***
## Biaya_Iklan        0.295970   0.114984   2.574   0.0164 *  
## Income             0.010546   0.001499   7.037 2.24e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.5 on 25 degrees of freedom
## Multiple R-squared:  0.8894, Adjusted R-squared:  0.8717 
## F-statistic: 50.26 on 4 and 25 DF,  p-value: 1.351e-11
plot_model(regresi,type = "est",sort.est = TRUE,
           transform = "exp" )

plot_model(model = regresi,type="pred")
## $Harga

## 
## $Harga_Kompetitor

## 
## $Biaya_Iklan

## 
## $Income

Model Checking

plot_model(regresi,type = "diag")
## [[1]]

## 
## [[2]]
## `geom_smooth()` using formula 'y ~ x'

## 
## [[3]]

## 
## [[4]]
## `geom_smooth()` using formula 'y ~ x'

res <- residuals(regresi)
# uji normalitas
shapiro.test(res)
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.98039, p-value = 0.8359
fBasics::jarqueberaTest(res)
## 
## Title:
##  Jarque - Bera Normalality Test
## 
## Test Results:
##   STATISTIC:
##     X-squared: 0.4725
##   P VALUE:
##     Asymptotic p Value: 0.7896 
## 
## Description:
##  Mon Oct 23 17:55:44 2023 by user: hendra
fBasics::ksnormTest(res,)
## 
## Title:
##  One-sample Kolmogorov-Smirnov test
## 
## Test Results:
##   STATISTIC:
##     D: 0.5
##   P VALUE:
##     Alternative Two-Sided: 1.847e-07 
##     Alternative      Less: 8.951e-07 
##     Alternative   Greater: 9.234e-08 
## 
## Description:
##  Mon Oct 23 17:55:44 2023 by user: hendra
print(fBasics::adTest(res))
## 
## Title:
##  Anderson - Darling Normality Test
## 
## Test Results:
##   STATISTIC:
##     A: 0.2566
##   P VALUE:
##     0.6991 
## 
## Description:
##  Mon Oct 23 17:55:44 2023 by user: hendra

Prediksi Regresi Linear

Membagi data menjadi training testing

set.seed(123)
data_split <- initial_split(data = data.demand,prop = 0.8)
train1 <- training(data_split)
test1 <- testing(data_split)
regresi2 <- lm(Demand ~.,data = train1)

Prediksi data testing

prediksi <- predict(regresi2,newdata = test1)
head(prediksi)
##        1        2        3        4        5        6 
## 1193.161 1203.997 1335.197 1341.295 1002.570 1318.758

Evaluasi hasil prediksi

# RMSE
mlr3measures::rmse(response = prediksi,truth = test1$Demand)
## [1] 60.69091
# MAPE
mlr3measures::mape(response = prediksi,truth = test1$Demand)
## [1] 0.04504478
# Spearman Correlation
mlr3measures::srho(response = prediksi,truth = test1$Demand)
## [1] 0.9428571