Regresi Linier

Hendrad09

2023-10-23

library(rsample)
library(DataExplorer)
library(sjPlot)

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(openxlsx)
library(readxl)
library(car)

## Loading required package: carData

library(ggcorrplot)

## Loading required package: ggplot2

Data

data.demand <- read_excel(path = "Data/data_01.xlsx", col_names = TRUE, sheet="sheet1")

head(data.demand)

## # A tibble: 6 x 5
##   Demand Harga Harga_Kompetitor Biaya_Iklan Income
##    <dbl> <dbl>            <dbl>       <dbl>  <dbl>
## 1   1290   137               94         814  42498
## 2   1117   147               81         896  41399
## 3   1155   149               89         852  39905
## 4   1299   117               92         854  34871
## 5   1166   135               86         810  34239
## 6   1186   143               79         768  44452

dim(data.demand)

## [1] 30  5

str(data.demand)

## tibble [30 x 5] (S3: tbl_df/tbl/data.frame)
##  $ Demand          : num [1:30] 1290 1117 1155 1299 1166 ...
##  $ Harga           : num [1:30] 137 147 149 117 135 143 113 111 109 129 ...
##  $ Harga_Kompetitor: num [1:30] 94 81 89 92 86 79 91 82 81 82 ...
##  $ Biaya_Iklan     : num [1:30] 814 896 852 854 810 768 978 821 843 849 ...
##  $ Income          : num [1:30] 42498 41399 39905 34871 34239 ...

skimr::skim(data.demand)

Data summary
Name	data.demand
Number of rows	30
Number of columns	5
_______________________
Column type frequency:
numeric	5
________________________
Group variables	None

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Demand	1	1283.80	107.49	1089	1206.00	1293.0	1330.25	1515	▃▃▇▁▂
Harga	1	127.00	14.32	103	116.25	125.5	138.00	149	▆▇▆▇▇
Harga_Kompetitor	1	87.33	7.23	76	82.00	87.5	91.75	100	▅▇▇▃▆
Biaya_Iklan	1	870.17	69.51	768	818.00	853.0	913.75	1000	▆▇▃▃▃
Income	1	37430.03	4898.18	30247	33442.50	37543.5	41349.50	44671	▇▆▆▇▇

Data Exploration

scatterplot(Demand ~ Harga, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

scatterplot(Demand ~ Harga_Kompetitor, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

scatterplot(Demand ~ Biaya_Iklan, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

scatterplot(Demand ~ Income, data=data.demand, xlab="label x",   ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)

car::scatterplotMatrix(~Demand+Harga+Harga_Kompetitor + Biaya_Iklan+ Income, data=data.demand, reg.line=FALSE)

Korelasi

korelasi <- cor(data.demand)
ggcorrplot(korelasi, type="lower", lab = TRUE)

model <- lm(Demand ~. , data.demand)
vif(model)

##            Harga Harga_Kompetitor      Biaya_Iklan           Income 
##         1.014255         1.252316         1.249578         1.054299

Memeriksa Sebaran Data

plot_histogram(data = data.demand,nrow=3,ncol = 3,
               geom_histogram_args = list(fill="steelblue"))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Model Regresi Linear

regresi <- lm(formula = Demand~., data = data.demand)
summary(regresi)

## 
## Call:
## lm(formula = Demand ~ ., data = data.demand)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -79.080 -25.163  -3.329  29.869  69.172 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      845.017716 151.749505   5.569 8.63e-06 ***
## Harga             -5.227902   0.502860 -10.396 1.45e-10 ***
## Harga_Kompetitor   5.157661   1.107094   4.659 9.02e-05 ***
## Biaya_Iklan        0.295970   0.114984   2.574   0.0164 *  
## Income             0.010546   0.001499   7.037 2.24e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.5 on 25 degrees of freedom
## Multiple R-squared:  0.8894, Adjusted R-squared:  0.8717 
## F-statistic: 50.26 on 4 and 25 DF,  p-value: 1.351e-11

plot_model(regresi,type = "est",sort.est = TRUE,
           transform = "exp" )

plot_model(model = regresi,type="pred")

## $Harga

## 
## $Harga_Kompetitor

## 
## $Biaya_Iklan

## 
## $Income

Model Checking

plot_model(regresi,type = "diag")

## [[1]]

## 
## [[2]]

## `geom_smooth()` using formula 'y ~ x'

## 
## [[3]]

## 
## [[4]]

## `geom_smooth()` using formula 'y ~ x'

res <- residuals(regresi)

# uji normalitas
shapiro.test(res)

## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.98039, p-value = 0.8359

fBasics::jarqueberaTest(res)

## 
## Title:
##  Jarque - Bera Normalality Test
## 
## Test Results:
##   STATISTIC:
##     X-squared: 0.4725
##   P VALUE:
##     Asymptotic p Value: 0.7896 
## 
## Description:
##  Mon Oct 23 17:55:44 2023 by user: hendra

fBasics::ksnormTest(res,)

## 
## Title:
##  One-sample Kolmogorov-Smirnov test
## 
## Test Results:
##   STATISTIC:
##     D: 0.5
##   P VALUE:
##     Alternative Two-Sided: 1.847e-07 
##     Alternative      Less: 8.951e-07 
##     Alternative   Greater: 9.234e-08 
## 
## Description:
##  Mon Oct 23 17:55:44 2023 by user: hendra

print(fBasics::adTest(res))

## 
## Title:
##  Anderson - Darling Normality Test
## 
## Test Results:
##   STATISTIC:
##     A: 0.2566
##   P VALUE:
##     0.6991 
## 
## Description:
##  Mon Oct 23 17:55:44 2023 by user: hendra

Prediksi Regresi Linear

Membagi data menjadi training testing

set.seed(123)
data_split <- initial_split(data = data.demand,prop = 0.8)
train1 <- training(data_split)
test1 <- testing(data_split)
regresi2 <- lm(Demand ~.,data = train1)

Prediksi data testing

prediksi <- predict(regresi2,newdata = test1)
head(prediksi)

##        1        2        3        4        5        6 
## 1193.161 1203.997 1335.197 1341.295 1002.570 1318.758

Evaluasi hasil prediksi

# RMSE
mlr3measures::rmse(response = prediksi,truth = test1$Demand)

## [1] 60.69091

# MAPE
mlr3measures::mape(response = prediksi,truth = test1$Demand)

## [1] 0.04504478

# Spearman Correlation
mlr3measures::srho(response = prediksi,truth = test1$Demand)

## [1] 0.9428571