library(rsample)
library(DataExplorer)
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(openxlsx)
library(readxl)
library(car)
## Loading required package: carData
library(ggcorrplot)
## Loading required package: ggplot2
Data
data.demand <- read_excel(path = "Data/data_01.xlsx", col_names = TRUE, sheet="sheet1")
head(data.demand)
## # A tibble: 6 x 5
## Demand Harga Harga_Kompetitor Biaya_Iklan Income
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1290 137 94 814 42498
## 2 1117 147 81 896 41399
## 3 1155 149 89 852 39905
## 4 1299 117 92 854 34871
## 5 1166 135 86 810 34239
## 6 1186 143 79 768 44452
dim(data.demand)
## [1] 30 5
str(data.demand)
## tibble [30 x 5] (S3: tbl_df/tbl/data.frame)
## $ Demand : num [1:30] 1290 1117 1155 1299 1166 ...
## $ Harga : num [1:30] 137 147 149 117 135 143 113 111 109 129 ...
## $ Harga_Kompetitor: num [1:30] 94 81 89 92 86 79 91 82 81 82 ...
## $ Biaya_Iklan : num [1:30] 814 896 852 854 810 768 978 821 843 849 ...
## $ Income : num [1:30] 42498 41399 39905 34871 34239 ...
skimr::skim(data.demand)
| Name | data.demand |
| Number of rows | 30 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Demand | 0 | 1 | 1283.80 | 107.49 | 1089 | 1206.00 | 1293.0 | 1330.25 | 1515 | ▃▃▇▁▂ |
| Harga | 0 | 1 | 127.00 | 14.32 | 103 | 116.25 | 125.5 | 138.00 | 149 | ▆▇▆▇▇ |
| Harga_Kompetitor | 0 | 1 | 87.33 | 7.23 | 76 | 82.00 | 87.5 | 91.75 | 100 | ▅▇▇▃▆ |
| Biaya_Iklan | 0 | 1 | 870.17 | 69.51 | 768 | 818.00 | 853.0 | 913.75 | 1000 | ▆▇▃▃▃ |
| Income | 0 | 1 | 37430.03 | 4898.18 | 30247 | 33442.50 | 37543.5 | 41349.50 | 44671 | ▇▆▆▇▇ |
Data Exploration
scatterplot(Demand ~ Harga, data=data.demand, xlab="label x", ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)
scatterplot(Demand ~ Harga_Kompetitor, data=data.demand, xlab="label x", ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)
scatterplot(Demand ~ Biaya_Iklan, data=data.demand, xlab="label x", ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)
scatterplot(Demand ~ Income, data=data.demand, xlab="label x", ylab="label y", smoother=FALSE, reg.line=FALSE, grid=FALSE)
car::scatterplotMatrix(~Demand+Harga+Harga_Kompetitor + Biaya_Iklan+ Income, data=data.demand, reg.line=FALSE)
Korelasi
korelasi <- cor(data.demand)
ggcorrplot(korelasi, type="lower", lab = TRUE)
model <- lm(Demand ~. , data.demand)
vif(model)
## Harga Harga_Kompetitor Biaya_Iklan Income
## 1.014255 1.252316 1.249578 1.054299
Memeriksa Sebaran Data
plot_histogram(data = data.demand,nrow=3,ncol = 3,
geom_histogram_args = list(fill="steelblue"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Model Regresi Linear
regresi <- lm(formula = Demand~., data = data.demand)
summary(regresi)
##
## Call:
## lm(formula = Demand ~ ., data = data.demand)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.080 -25.163 -3.329 29.869 69.172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 845.017716 151.749505 5.569 8.63e-06 ***
## Harga -5.227902 0.502860 -10.396 1.45e-10 ***
## Harga_Kompetitor 5.157661 1.107094 4.659 9.02e-05 ***
## Biaya_Iklan 0.295970 0.114984 2.574 0.0164 *
## Income 0.010546 0.001499 7.037 2.24e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.5 on 25 degrees of freedom
## Multiple R-squared: 0.8894, Adjusted R-squared: 0.8717
## F-statistic: 50.26 on 4 and 25 DF, p-value: 1.351e-11
plot_model(regresi,type = "est",sort.est = TRUE,
transform = "exp" )
plot_model(model = regresi,type="pred")
## $Harga
##
## $Harga_Kompetitor
##
## $Biaya_Iklan
##
## $Income
Model Checking
plot_model(regresi,type = "diag")
## [[1]]
##
## [[2]]
## `geom_smooth()` using formula 'y ~ x'
##
## [[3]]
##
## [[4]]
## `geom_smooth()` using formula 'y ~ x'
res <- residuals(regresi)
# uji normalitas
shapiro.test(res)
##
## Shapiro-Wilk normality test
##
## data: res
## W = 0.98039, p-value = 0.8359
fBasics::jarqueberaTest(res)
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 0.4725
## P VALUE:
## Asymptotic p Value: 0.7896
##
## Description:
## Mon Oct 23 17:55:44 2023 by user: hendra
fBasics::ksnormTest(res,)
##
## Title:
## One-sample Kolmogorov-Smirnov test
##
## Test Results:
## STATISTIC:
## D: 0.5
## P VALUE:
## Alternative Two-Sided: 1.847e-07
## Alternative Less: 8.951e-07
## Alternative Greater: 9.234e-08
##
## Description:
## Mon Oct 23 17:55:44 2023 by user: hendra
print(fBasics::adTest(res))
##
## Title:
## Anderson - Darling Normality Test
##
## Test Results:
## STATISTIC:
## A: 0.2566
## P VALUE:
## 0.6991
##
## Description:
## Mon Oct 23 17:55:44 2023 by user: hendra
Prediksi Regresi Linear
Membagi data menjadi training testing
set.seed(123)
data_split <- initial_split(data = data.demand,prop = 0.8)
train1 <- training(data_split)
test1 <- testing(data_split)
regresi2 <- lm(Demand ~.,data = train1)
Prediksi data testing
prediksi <- predict(regresi2,newdata = test1)
head(prediksi)
## 1 2 3 4 5 6
## 1193.161 1203.997 1335.197 1341.295 1002.570 1318.758
Evaluasi hasil prediksi
# RMSE
mlr3measures::rmse(response = prediksi,truth = test1$Demand)
## [1] 60.69091
# MAPE
mlr3measures::mape(response = prediksi,truth = test1$Demand)
## [1] 0.04504478
# Spearman Correlation
mlr3measures::srho(response = prediksi,truth = test1$Demand)
## [1] 0.9428571