1. Introduction

Mobil BMW dikenal dengan segmentasi produk yang identik dengan kemewahan serta fasilitas dan kecanggihnya. Selain itu, mobil BMW memiliki model, harga, transmisi jarak tempuh, bahan bakar, jenis, pajak, mpg, dan ukuran yang berbeda-beda. Cara yang dapat digunakan dalam memilih mobil BMW berdasarkan hal tersebut adalah dengan penerapan regresi linear. Karena, terdapat beberapa data yang dapat digunakan dalam melakukan perhitungan guna mendapatkan perkiraan harga mobil BMW. Metode regresi linear digunakan sebagai penentuan harga jual mobil BMW. Tujuan dari penelitian ini adalah menerapkan metode Regresi Linear dalam menentukan harga jual mobil BMW berdasarkan model, tahun pembuatan, harga, transmisi jarak tempuh, bahan bakar, jenis, pajak, mpg, dan ukuran. Sistem yang dibangun ini untuk membantu memprediksi harga jual mobil BMW dapat bermanfaat dan baik.

  1. Library and Setup

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.1.2
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(caret)
## Warning: package 'caret' was built under R version 4.1.2
## Loading required package: lattice
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(ggplot2)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.1.2
## 
## Attaching package: 'MLmetrics'
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## The following object is masked from 'package:base':
## 
##     Recall
library(lime)
## Warning: package 'lime' was built under R version 4.1.2
## 
## Attaching package: 'lime'
## The following object is masked from 'package:dplyr':
## 
##     explain
library(rsample)
## Warning: package 'rsample' was built under R version 4.1.2
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

  1. Read Data

mobil_bmw<-read.csv("Mobil_bmw.csv")
head(mobil_bmw)
tail(mobil_bmw)

  1. Preproccessing Data

  1. Cek Tipe Data

str(mobil_bmw)
## 'data.frame':    10781 obs. of  9 variables:
##  $ model       : chr  " 5 Series" " 6 Series" " 5 Series" " 1 Series" ...
##  $ year        : int  2014 2018 2016 2017 2014 2016 2017 2018 2017 2016 ...
##  $ price       : int  11200 27000 16000 12750 14500 14900 16000 16250 14250 14250 ...
##  $ transmission: chr  "Automatic" "Automatic" "Automatic" "Automatic" ...
##  $ mileage     : int  67068 14827 62794 26676 39554 35309 38538 10401 42668 36099 ...
##  $ fuelType    : chr  "Diesel" "Petrol" "Diesel" "Diesel" ...
##  $ tax         : int  125 145 160 145 160 125 125 145 30 20 ...
##  $ mpg         : num  57.6 42.8 51.4 72.4 50.4 60.1 60.1 52.3 62.8 68.9 ...
##  $ engineSize  : num  2 2 3 1.5 3 2 2 1.5 2 2 ...

  1. Cek Null Data

colSums(is.na(mobil_bmw))
##        model         year        price transmission      mileage     fuelType 
##            0            0            0            0            0            0 
##          tax          mpg   engineSize 
##            0            0            0
anyNA(mobil_bmw)
## [1] FALSE

  1. Convert tipe data

mobil_bmw[,c("model","year","transmission","fuelType")]<-lapply(mobil_bmw[,c("model","year","transmission","fuelType")],as.factor)
str(mobil_bmw)
## 'data.frame':    10781 obs. of  9 variables:
##  $ model       : Factor w/ 24 levels " 1 Series"," 2 Series",..: 5 6 5 1 7 5 5 2 4 5 ...
##  $ year        : Factor w/ 25 levels "1996","1997",..: 19 23 21 22 19 21 22 23 22 21 ...
##  $ price       : int  11200 27000 16000 12750 14500 14900 16000 16250 14250 14250 ...
##  $ transmission: Factor w/ 3 levels "Automatic","Manual",..: 1 1 1 1 1 1 1 2 2 1 ...
##  $ mileage     : int  67068 14827 62794 26676 39554 35309 38538 10401 42668 36099 ...
##  $ fuelType    : Factor w/ 5 levels "Diesel","Electric",..: 1 5 1 1 1 1 1 5 1 1 ...
##  $ tax         : int  125 145 160 145 160 125 125 145 30 20 ...
##  $ mpg         : num  57.6 42.8 51.4 72.4 50.4 60.1 60.1 52.3 62.8 68.9 ...
##  $ engineSize  : num  2 2 3 1.5 3 2 2 1.5 2 2 ...

  1. Ringkasan Data

summary(mobil_bmw)
##        model           year          price           transmission 
##   3 Series:2443   2019   :3485   Min.   :  1200   Automatic:3588  
##   1 Series:1969   2016   :1882   1st Qu.: 14950   Manual   :2527  
##   2 Series:1229   2017   :1721   Median : 20462   Semi-Auto:4666  
##   5 Series:1056   2015   : 922   Mean   : 22733                   
##   4 Series: 995   2018   : 848   3rd Qu.: 27940                   
##   X1      : 804   2020   : 733   Max.   :123456                   
##  (Other)  :2285   (Other):1190                                    
##     mileage           fuelType         tax             mpg       
##  Min.   :     1   Diesel  :7027   Min.   :  0.0   Min.   :  5.5  
##  1st Qu.:  5529   Electric:   3   1st Qu.:135.0   1st Qu.: 45.6  
##  Median : 18347   Hybrid  : 298   Median :145.0   Median : 53.3  
##  Mean   : 25497   Other   :  36   Mean   :131.7   Mean   : 56.4  
##  3rd Qu.: 38206   Petrol  :3417   3rd Qu.:145.0   3rd Qu.: 62.8  
##  Max.   :214000                   Max.   :580.0   Max.   :470.8  
##                                                                  
##    engineSize   
##  Min.   :0.000  
##  1st Qu.:2.000  
##  Median :2.000  
##  Mean   :2.168  
##  3rd Qu.:2.000  
##  Max.   :6.600  
## 

Data selection(filter numeric only)

indek=c()
for (i in 1:length(names(mobil_bmw))){
  if(is.numeric(mobil_bmw[,i])==TRUE){
   indek=c(indek,i) 
  }
}
dataku=mobil_bmw[,indek]

  1. Tabel Korelasi

library(GGally)
ggcorr(dataku,label=T,label_size=3,hjust=0.9)

  1. Visualisasi Data

  1. Histogram

num_data <-mobil_bmw
for (i in 1:length(names(num_data))){
  if(is.numeric(num_data[,i])==TRUE){
    hist( num_data[,i], main=c("hist of ",names(num_data)[i]), breaks=20, prob=TRUE)
  }
  
}

  1. QQnorm

for (i in 1:length(names(num_data))){
  if(is.numeric(num_data[,i])==TRUE){
    qqnorm(num_data[,i],main=c("Q-Q Plot ",names(num_data)[i]))
    qqline(num_data[,i])
  }
}

  1. Boxplot

for (i in 1:length(names(mobil_bmw))){
  if(is.numeric(mobil_bmw[,i])==TRUE){
    
    boxplot(mobil_bmw[,i] , main=names(mobil_bmw)[i])
    
  }
}

  1. Remove Outlier

mobil_bmw=mobil_bmw[mobil_bmw$price<40000,]
boxplot(mobil_bmw$price , main="boxplot price")

  1. Validasi Data

set.seed(100)
idx <- initial_split(mobil_bmw,prop = 0.85,strata = "price")
concrete.train <- training(idx)
concrete.test <- testing(idx)

  1. Building Model Regression

mod.linear <- lm(formula = price ~., concrete.train)
summary(mod.linear)
## 
## Call:
## lm(formula = price ~ ., data = concrete.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16880.8  -1589.4    -97.9   1409.7  23705.7 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -3.203e+02  2.649e+03  -0.121 0.903772    
## model 2 Series        -2.237e+02  1.062e+02  -2.106 0.035266 *  
## model 3 Series         2.018e+03  9.446e+01  21.364  < 2e-16 ***
## model 4 Series         2.396e+03  1.182e+02  20.278  < 2e-16 ***
## model 5 Series         3.732e+03  1.210e+02  30.833  < 2e-16 ***
## model 6 Series         4.416e+03  3.012e+02  14.658  < 2e-16 ***
## model 7 Series         7.020e+03  3.853e+02  18.218  < 2e-16 ***
## model i3               3.081e+04  1.355e+03  22.731  < 2e-16 ***
## model i8               2.740e+04  2.668e+03  10.271  < 2e-16 ***
## model M2               8.858e+03  1.519e+03   5.830 5.76e-09 ***
## model M3               1.280e+04  6.669e+02  19.196  < 2e-16 ***
## model M4               1.332e+04  4.227e+02  31.512  < 2e-16 ***
## model M5               9.666e+03  1.113e+03   8.685  < 2e-16 ***
## model M6               9.462e+03  1.355e+03   6.983 3.10e-12 ***
## model X1               2.477e+03  1.243e+02  19.934  < 2e-16 ***
## model X2               3.253e+03  1.902e+02  17.101  < 2e-16 ***
## model X3               6.783e+03  1.539e+02  44.070  < 2e-16 ***
## model X4               8.163e+03  2.602e+02  31.377  < 2e-16 ***
## model X5               1.074e+04  2.274e+02  47.205  < 2e-16 ***
## model X6               1.095e+04  4.187e+02  26.162  < 2e-16 ***
## model Z3               3.438e+03  2.205e+03   1.559 0.119051    
## model Z4               4.300e+03  2.992e+02  14.371  < 2e-16 ***
## year1997               4.047e+03  4.316e+03   0.938 0.348361    
## year1998               4.677e+03  4.316e+03   1.084 0.278568    
## year1999               5.440e+03  3.898e+03   1.396 0.162897    
## year2000               4.119e+03  3.215e+03   1.281 0.200067    
## year2001               1.000e+04  3.119e+03   3.208 0.001342 ** 
## year2002               6.123e+03  2.985e+03   2.051 0.040303 *  
## year2003               4.159e+03  3.216e+03   1.293 0.195981    
## year2004               4.813e+03  2.773e+03   1.736 0.082628 .  
## year2005               3.807e+03  2.942e+03   1.294 0.195688    
## year2006               6.104e+03  2.737e+03   2.230 0.025767 *  
## year2007               7.444e+03  2.735e+03   2.722 0.006506 ** 
## year2008               7.730e+03  2.688e+03   2.876 0.004043 ** 
## year2009               8.617e+03  2.675e+03   3.221 0.001282 ** 
## year2010               8.970e+03  2.662e+03   3.370 0.000756 ***
## year2011               8.717e+03  2.656e+03   3.282 0.001034 ** 
## year2012               1.058e+04  2.639e+03   4.009 6.14e-05 ***
## year2013               1.159e+04  2.630e+03   4.407 1.06e-05 ***
## year2014               1.256e+04  2.629e+03   4.777 1.81e-06 ***
## year2015               1.389e+04  2.628e+03   5.288 1.27e-07 ***
## year2016               1.523e+04  2.627e+03   5.796 7.02e-09 ***
## year2017               1.688e+04  2.627e+03   6.425 1.39e-10 ***
## year2018               1.872e+04  2.628e+03   7.125 1.12e-12 ***
## year2019               2.332e+04  2.627e+03   8.878  < 2e-16 ***
## year2020               2.570e+04  2.629e+03   9.776  < 2e-16 ***
## transmissionManual    -1.136e+03  8.601e+01 -13.211  < 2e-16 ***
## transmissionSemi-Auto  4.131e+02  7.145e+01   5.781 7.67e-09 ***
## mileage               -8.394e-02  1.955e-03 -42.934  < 2e-16 ***
## fuelTypeElectric       4.132e+03  1.601e+03   2.582 0.009845 ** 
## fuelTypeHybrid         5.782e+03  2.901e+02  19.926  < 2e-16 ***
## fuelTypeOther          4.894e+03  5.413e+02   9.041  < 2e-16 ***
## fuelTypePetrol        -4.919e+02  9.076e+01  -5.420 6.12e-08 ***
## tax                   -5.510e+00  6.906e-01  -7.979 1.67e-15 ***
## mpg                   -6.516e+01  3.799e+00 -17.150  < 2e-16 ***
## engineSize             3.323e+03  8.374e+01  39.688  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2622 on 8460 degrees of freedom
## Multiple R-squared:  0.8867, Adjusted R-squared:  0.8859 
## F-statistic:  1203 on 55 and 8460 DF,  p-value: < 2.2e-16

Mencari Model optimal secara automatic denga metode stepwise

mod.linear.step<- step(object = mod.linear, direction = "both")
## Start:  AIC=134127.6
## price ~ model + year + transmission + mileage + fuelType + tax + 
##     mpg + engineSize
## 
##                Df  Sum of Sq        RSS    AIC
## <none>                       5.8169e+10 134128
## - tax           1 4.3773e+08 5.8607e+10 134189
## - mpg           1 2.0224e+09 6.0191e+10 134417
## - transmission  2 2.2902e+09 6.0459e+10 134452
## - fuelType      4 2.8021e+09 6.0971e+10 134520
## - engineSize    1 1.0830e+10 6.9000e+10 135580
## - mileage       1 1.2674e+10 7.0843e+10 135804
## - model        21 4.8920e+10 1.0709e+11 139283
## - year         24 6.1412e+10 1.1958e+11 140217
summary(mod.linear.step)
## 
## Call:
## lm(formula = price ~ model + year + transmission + mileage + 
##     fuelType + tax + mpg + engineSize, data = concrete.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16880.8  -1589.4    -97.9   1409.7  23705.7 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -3.203e+02  2.649e+03  -0.121 0.903772    
## model 2 Series        -2.237e+02  1.062e+02  -2.106 0.035266 *  
## model 3 Series         2.018e+03  9.446e+01  21.364  < 2e-16 ***
## model 4 Series         2.396e+03  1.182e+02  20.278  < 2e-16 ***
## model 5 Series         3.732e+03  1.210e+02  30.833  < 2e-16 ***
## model 6 Series         4.416e+03  3.012e+02  14.658  < 2e-16 ***
## model 7 Series         7.020e+03  3.853e+02  18.218  < 2e-16 ***
## model i3               3.081e+04  1.355e+03  22.731  < 2e-16 ***
## model i8               2.740e+04  2.668e+03  10.271  < 2e-16 ***
## model M2               8.858e+03  1.519e+03   5.830 5.76e-09 ***
## model M3               1.280e+04  6.669e+02  19.196  < 2e-16 ***
## model M4               1.332e+04  4.227e+02  31.512  < 2e-16 ***
## model M5               9.666e+03  1.113e+03   8.685  < 2e-16 ***
## model M6               9.462e+03  1.355e+03   6.983 3.10e-12 ***
## model X1               2.477e+03  1.243e+02  19.934  < 2e-16 ***
## model X2               3.253e+03  1.902e+02  17.101  < 2e-16 ***
## model X3               6.783e+03  1.539e+02  44.070  < 2e-16 ***
## model X4               8.163e+03  2.602e+02  31.377  < 2e-16 ***
## model X5               1.074e+04  2.274e+02  47.205  < 2e-16 ***
## model X6               1.095e+04  4.187e+02  26.162  < 2e-16 ***
## model Z3               3.438e+03  2.205e+03   1.559 0.119051    
## model Z4               4.300e+03  2.992e+02  14.371  < 2e-16 ***
## year1997               4.047e+03  4.316e+03   0.938 0.348361    
## year1998               4.677e+03  4.316e+03   1.084 0.278568    
## year1999               5.440e+03  3.898e+03   1.396 0.162897    
## year2000               4.119e+03  3.215e+03   1.281 0.200067    
## year2001               1.000e+04  3.119e+03   3.208 0.001342 ** 
## year2002               6.123e+03  2.985e+03   2.051 0.040303 *  
## year2003               4.159e+03  3.216e+03   1.293 0.195981    
## year2004               4.813e+03  2.773e+03   1.736 0.082628 .  
## year2005               3.807e+03  2.942e+03   1.294 0.195688    
## year2006               6.104e+03  2.737e+03   2.230 0.025767 *  
## year2007               7.444e+03  2.735e+03   2.722 0.006506 ** 
## year2008               7.730e+03  2.688e+03   2.876 0.004043 ** 
## year2009               8.617e+03  2.675e+03   3.221 0.001282 ** 
## year2010               8.970e+03  2.662e+03   3.370 0.000756 ***
## year2011               8.717e+03  2.656e+03   3.282 0.001034 ** 
## year2012               1.058e+04  2.639e+03   4.009 6.14e-05 ***
## year2013               1.159e+04  2.630e+03   4.407 1.06e-05 ***
## year2014               1.256e+04  2.629e+03   4.777 1.81e-06 ***
## year2015               1.389e+04  2.628e+03   5.288 1.27e-07 ***
## year2016               1.523e+04  2.627e+03   5.796 7.02e-09 ***
## year2017               1.688e+04  2.627e+03   6.425 1.39e-10 ***
## year2018               1.872e+04  2.628e+03   7.125 1.12e-12 ***
## year2019               2.332e+04  2.627e+03   8.878  < 2e-16 ***
## year2020               2.570e+04  2.629e+03   9.776  < 2e-16 ***
## transmissionManual    -1.136e+03  8.601e+01 -13.211  < 2e-16 ***
## transmissionSemi-Auto  4.131e+02  7.145e+01   5.781 7.67e-09 ***
## mileage               -8.394e-02  1.955e-03 -42.934  < 2e-16 ***
## fuelTypeElectric       4.132e+03  1.601e+03   2.582 0.009845 ** 
## fuelTypeHybrid         5.782e+03  2.901e+02  19.926  < 2e-16 ***
## fuelTypeOther          4.894e+03  5.413e+02   9.041  < 2e-16 ***
## fuelTypePetrol        -4.919e+02  9.076e+01  -5.420 6.12e-08 ***
## tax                   -5.510e+00  6.906e-01  -7.979 1.67e-15 ***
## mpg                   -6.516e+01  3.799e+00 -17.150  < 2e-16 ***
## engineSize             3.323e+03  8.374e+01  39.688  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2622 on 8460 degrees of freedom
## Multiple R-squared:  0.8867, Adjusted R-squared:  0.8859 
## F-statistic:  1203 on 55 and 8460 DF,  p-value: < 2.2e-16

  1. Evaluasi Model

prediction_test <- predict(object = mod.linear.step,newdata = concrete.test, interval = "confidence", level = 0.95)
MAE<-mean(abs(prediction_test - concrete.test$price))
MAE
## [1] 1997.778
prediction_train <- predict(object = mod.linear.step,newdata = concrete.train, interval = "confidence", level = 0.95)
MAE<-mean(abs(prediction_train - concrete.train$price))
MAE
## [1] 1965.681
library(dplyr)
predict.admission <- predict(object = mod.linear.step, newdata = concrete.test
                             %>% select(-price))
head(predict.admission)
##        21        23        29        33        39        42 
##  9203.337 16771.540 19896.506 19808.310 18152.415 12290.286
head(concrete.test)
library(MLmetrics)
RMSE(y_pred = predict.admission,y_true = concrete.test$price)
## [1] 2592.122
MAE<-mean(abs(predict.admission - concrete.test$price))
MAE
## [1] 1979.214
#Normality Test
library(nortest)
lillie.test(mod.linear$residuals)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  mod.linear$residuals
## D = 0.046338, p-value < 2.2e-16
bptest(formula = mod.linear.step)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod.linear.step
## BP = 1032.5, df = 55, p-value < 2.2e-16
vif(mod.linear.step)
##                    GVIF Df GVIF^(1/(2*Df))
## model        146.165715 21        1.126014
## year          22.306434 24        1.066823
## transmission   1.651878  2        1.133691
## mileage        3.008201  1        1.734417
## fuelType       6.052199  4        1.252389
## tax            2.299355  1        1.516362
## mpg           19.333570  1        4.396996
## engineSize     2.210234  1        1.486686