Analisis Regresi Linier Berganda adalah model regresi linier dengan melibatkan lebih dari satu variabel bebas atau predictor.

library(MASS)
## Warning: package 'MASS' was built under R version 3.5.3
library(car)
## Loading required package: carData

Membaca data dari file format csv

carmodel <- read.csv("Carprice_Dataset.csv")

Melihat 6 baris data dari atas

head(carmodel)
##   car_ID symboling                  CarName fueltype aspiration doornumber
## 1      1         3       alfa-romero giulia      gas        std        two
## 2      2         3      alfa-romero stelvio      gas        std        two
## 3      3         1 alfa-romero Quadrifoglio      gas        std        two
## 4      4         2              audi 100 ls      gas        std       four
## 5      5         2               audi 100ls      gas        std       four
## 6      6         2                 audi fox      gas        std        two
##       carbody drivewheel enginelocation wheelbase carlength carwidth
## 1 convertible        rwd          front      88.6     168.8     64.1
## 2 convertible        rwd          front      88.6     168.8     64.1
## 3   hatchback        rwd          front      94.5     171.2     65.5
## 4       sedan        fwd          front      99.8     176.6     66.2
## 5       sedan        4wd          front      99.4     176.6     66.4
## 6       sedan        fwd          front      99.8     177.3     66.3
##   carheight curbweight enginetype cylindernumber enginesize fuelsystem
## 1      48.8       2548       dohc           four        130       mpfi
## 2      48.8       2548       dohc           four        130       mpfi
## 3      52.4       2823       ohcv            six        152       mpfi
## 4      54.3       2337        ohc           four        109       mpfi
## 5      54.3       2824        ohc           five        136       mpfi
## 6      53.1       2507        ohc           five        136       mpfi
##   boreratio stroke compressionratio horsepower peakrpm citympg highwaympg
## 1      3.47   2.68              9.0        111    5000      21         27
## 2      3.47   2.68              9.0        111    5000      21         27
## 3      2.68   3.47              9.0        154    5000      19         26
## 4      3.19   3.40             10.0        102    5500      24         30
## 5      3.19   3.40              8.0        115    5500      18         22
## 6      3.19   3.40              8.5        110    5500      19         25
##   price
## 1 13495
## 2 16500
## 3 16500
## 4 13950
## 5 17450
## 6 15250

Struktur Data

str(carmodel)
## 'data.frame':    205 obs. of  26 variables:
##  $ car_ID          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ symboling       : int  3 3 1 2 2 2 1 1 1 0 ...
##  $ CarName         : Factor w/ 147 levels "alfa-romero giulia",..: 1 3 2 4 5 9 5 7 6 8 ...
##  $ fueltype        : Factor w/ 2 levels "diesel","gas": 2 2 2 2 2 2 2 2 2 2 ...
##  $ aspiration      : Factor w/ 2 levels "std","turbo": 1 1 1 1 1 1 1 1 2 2 ...
##  $ doornumber      : Factor w/ 2 levels "four","two": 2 2 2 1 1 2 1 1 1 2 ...
##  $ carbody         : Factor w/ 5 levels "convertible",..: 1 1 3 4 4 4 4 5 4 3 ...
##  $ drivewheel      : Factor w/ 3 levels "4wd","fwd","rwd": 3 3 3 2 1 2 2 2 2 1 ...
##  $ enginelocation  : Factor w/ 2 levels "front","rear": 1 1 1 1 1 1 1 1 1 1 ...
##  $ wheelbase       : num  88.6 88.6 94.5 99.8 99.4 ...
##  $ carlength       : num  169 169 171 177 177 ...
##  $ carwidth        : num  64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
##  $ carheight       : num  48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
##  $ curbweight      : int  2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
##  $ enginetype      : Factor w/ 7 levels "dohc","dohcv",..: 1 1 6 4 4 4 4 4 4 4 ...
##  $ cylindernumber  : Factor w/ 7 levels "eight","five",..: 3 3 4 3 2 2 2 2 2 2 ...
##  $ enginesize      : int  130 130 152 109 136 136 136 136 131 131 ...
##  $ fuelsystem      : Factor w/ 8 levels "1bbl","2bbl",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ boreratio       : num  3.47 3.47 2.68 3.19 3.19 3.19 3.19 3.19 3.13 3.13 ...
##  $ stroke          : num  2.68 2.68 3.47 3.4 3.4 3.4 3.4 3.4 3.4 3.4 ...
##  $ compressionratio: num  9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
##  $ horsepower      : int  111 111 154 102 115 110 110 110 140 160 ...
##  $ peakrpm         : int  5000 5000 5000 5500 5500 5500 5500 5500 5500 5500 ...
##  $ citympg         : int  21 21 19 24 18 19 19 19 17 16 ...
##  $ highwaympg      : int  27 27 26 30 22 25 25 25 20 22 ...
##  $ price           : num  13495 16500 16500 13950 17450 ...

Membersihkan data dari \ .* pada variabel Carname

carmodel$companyname <- gsub("\\ .*", "", carmodel$CarName)

Melihat isi variabel dari data carmodel

unique(carmodel$companyname)
##  [1] "alfa-romero" "audi"        "bmw"         "chevrolet"   "dodge"      
##  [6] "honda"       "isuzu"       "jaguar"      "maxda"       "mazda"      
## [11] "buick"       "mercury"     "mitsubishi"  "Nissan"      "nissan"     
## [16] "peugeot"     "plymouth"    "porsche"     "porcshce"    "renault"    
## [21] "saab"        "subaru"      "toyota"      "toyouta"     "vokswagen"  
## [26] "volkswagen"  "vw"          "volvo"

Mengganti nama-nama mobil dari variabel companyname

#vokswagen => volkswagen
carmodel$companyname <- gsub("vokswagen","volkswagen",carmodel$companyname)
#maxda=>mazda
carmodel$companyname <- gsub("maxda","mazda",carmodel$companyname)
#Nissan=>nissan
carmodel$companyname <- gsub("Nissan","nissan",carmodel$companyname)
#porcshce=>porsche
carmodel$companyname <- gsub("porcshce","porsche",carmodel$companyname)
#vw=>volkswagen
carmodel$companyname <- gsub("vw","volkswagen",carmodel$companyname)
#toyouta=>toyota
carmodel$companyname <- gsub("toyouta","toyota",carmodel$companyname)

Melihat isi data dari variabel companyname

unique(carmodel$companyname)
##  [1] "alfa-romero" "audi"        "bmw"         "chevrolet"   "dodge"      
##  [6] "honda"       "isuzu"       "jaguar"      "mazda"       "buick"      
## [11] "mercury"     "mitsubishi"  "nissan"      "peugeot"     "plymouth"   
## [16] "porsche"     "renault"     "saab"        "subaru"      "toyota"     
## [21] "volkswagen"  "volvo"

melihat hasil NA

sum(is.na(carmodel))
## [1] 0

Menentukan elemen yang mengalami duplikat

sum(duplicated(carmodel)) 
## [1] 0

Melihat isi data dari variabel symboling,fueltype,aspiration,doornumber,carbody,drivewheel

unique(carmodel$symboling)
## [1]  3  1  2  0 -1 -2
unique(carmodel$fueltype)
## [1] gas    diesel
## Levels: diesel gas
unique(carmodel$aspiration)
## [1] std   turbo
## Levels: std turbo
unique(carmodel$doornumber)
## [1] two  four
## Levels: four two
unique(carmodel$carbody)
## [1] convertible hatchback   sedan       wagon       hardtop    
## Levels: convertible hardtop hatchback sedan wagon
unique(carmodel$drivewheel)
## [1] rwd fwd 4wd
## Levels: 4wd fwd rwd

Membuat menjadi huruf kapital

carmodel$companyname <- toupper(carmodel$companyname)

Membuat data pada kolom menjadi bertipe factor

carmodel$symboling <- as.factor(carmodel$symboling)
carmodel$CarName <- as.factor(carmodel$CarName)
carmodel$carbody <- as.factor(carmodel$carbody)
carmodel$drivewheel <- as.factor(carmodel$drivewheel)
carmodel$enginelocation <- as.factor(carmodel$enginelocation)
carmodel$cylindernumber <- as.factor(carmodel$cylindernumber)
carmodel$doornumber <- as.factor(carmodel$doornumber)
carmodel$fuelsystem <- as.factor(carmodel$fuelsystem)
carmodel$aspiration <- as.factor(carmodel$aspiration)
carmodel$enginetype <- as.factor(carmodel$enginetype)
carmodel$fueltype <- as.factor(carmodel$fueltype)

Struktur dari data carmodel

str(carmodel)
## 'data.frame':    205 obs. of  27 variables:
##  $ car_ID          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ symboling       : Factor w/ 6 levels "-2","-1","0",..: 6 6 4 5 5 5 4 4 4 3 ...
##  $ CarName         : Factor w/ 147 levels "alfa-romero giulia",..: 1 3 2 4 5 9 5 7 6 8 ...
##  $ fueltype        : Factor w/ 2 levels "diesel","gas": 2 2 2 2 2 2 2 2 2 2 ...
##  $ aspiration      : Factor w/ 2 levels "std","turbo": 1 1 1 1 1 1 1 1 2 2 ...
##  $ doornumber      : Factor w/ 2 levels "four","two": 2 2 2 1 1 2 1 1 1 2 ...
##  $ carbody         : Factor w/ 5 levels "convertible",..: 1 1 3 4 4 4 4 5 4 3 ...
##  $ drivewheel      : Factor w/ 3 levels "4wd","fwd","rwd": 3 3 3 2 1 2 2 2 2 1 ...
##  $ enginelocation  : Factor w/ 2 levels "front","rear": 1 1 1 1 1 1 1 1 1 1 ...
##  $ wheelbase       : num  88.6 88.6 94.5 99.8 99.4 ...
##  $ carlength       : num  169 169 171 177 177 ...
##  $ carwidth        : num  64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
##  $ carheight       : num  48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
##  $ curbweight      : int  2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
##  $ enginetype      : Factor w/ 7 levels "dohc","dohcv",..: 1 1 6 4 4 4 4 4 4 4 ...
##  $ cylindernumber  : Factor w/ 7 levels "eight","five",..: 3 3 4 3 2 2 2 2 2 2 ...
##  $ enginesize      : int  130 130 152 109 136 136 136 136 131 131 ...
##  $ fuelsystem      : Factor w/ 8 levels "1bbl","2bbl",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ boreratio       : num  3.47 3.47 2.68 3.19 3.19 3.19 3.19 3.19 3.13 3.13 ...
##  $ stroke          : num  2.68 2.68 3.47 3.4 3.4 3.4 3.4 3.4 3.4 3.4 ...
##  $ compressionratio: num  9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
##  $ horsepower      : int  111 111 154 102 115 110 110 110 140 160 ...
##  $ peakrpm         : int  5000 5000 5000 5500 5500 5500 5500 5500 5500 5500 ...
##  $ citympg         : int  21 21 19 24 18 19 19 19 17 16 ...
##  $ highwaympg      : int  27 27 26 30 22 25 25 25 20 22 ...
##  $ price           : num  13495 16500 16500 13950 17450 ...
##  $ companyname     : chr  "ALFA-ROMERO" "ALFA-ROMERO" "ALFA-ROMERO" "AUDI" ...

Mengubah data factor menjadi data tipe numeric

#Mengubah data factor menjadi data tipe numeric
levels(carmodel$enginelocation) <- c(1,0)
carmodel$enginelocation <- as.numeric(levels(carmodel$enginelocation))[carmodel$enginelocation]
#Mengubah data factor menjadi data tipe numeric
levels(carmodel$doornumber) <- c(1,0)
carmodel$doornumber <- as.numeric(levels(carmodel$doornumber))[carmodel$doornumber]
#Mengubah data factor menjadi data tipe numeric
levels(carmodel$fueltype) <- c(1,0)
carmodel$fueltype <- as.numeric(levels(carmodel$fueltype))[carmodel$fueltype]
#Mengubah data factor menjadi data tipe numeric
levels(carmodel$aspiration) <-c(1,0)
carmodel$aspiration <- as.numeric(levels(carmodel$aspiration))[carmodel$aspiration]

Membuat data factor menjadi data biner

#Membuat data factor menjadi data biner
dummy_drivewheel <- as.data.frame(model.matrix(~drivewheel,data = carmodel))
dummy_drivewheel <- dummy_drivewheel[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='drivewheel')],dummy_drivewheel)
#Membuat data factor menjadi data biner
dummy_carbody <- as.data.frame(model.matrix(~carbody,data = carmodel))
dummy_carbody <- dummy_carbody[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='carbody')],dummy_carbody)
#Membuat data factor menjadi data biner
dummy_enginetype <-  as.data.frame(model.matrix(~enginetype,data = carmodel))
dummy_enginetype <- dummy_enginetype[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='enginetype')],dummy_enginetype)
#Membuat data factor menjadi data biner
dummy_cylindernumber <-  as.data.frame(model.matrix(~cylindernumber,data = carmodel))
dummy_cylindernumber <- dummy_cylindernumber[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='cylindernumber')],dummy_cylindernumber)
#Membuat data factor menjadi data biner
dummy_fuelsystem <-  as.data.frame(model.matrix(~fuelsystem,data = carmodel))
dummy_fuelsystem <- dummy_fuelsystem[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='fuelsystem')],dummy_fuelsystem)
#Membuat data factor menjadi data biner
dummy_companyname <-  as.data.frame(model.matrix(~companyname,data = carmodel))
dummy_companyname <- dummy_companyname[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='companyname')],dummy_companyname)
#Membuat data factor menjadi data biner
dummy_symboling <-  as.data.frame(model.matrix(~symboling,data = carmodel))
dummy_symboling <- dummy_symboling[,-1]
carmodel <- cbind(carmodel[,-which(colnames(carmodel)=='symboling')],dummy_symboling)

Menghilangkan variabel CarName

carmodel <- carmodel[,-which(colnames(carmodel)=='CarName')]
str(carmodel)
## 'data.frame':    205 obs. of  70 variables:
##  $ car_ID               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ fueltype             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aspiration           : num  1 1 1 1 1 1 1 1 0 0 ...
##  $ doornumber           : num  0 0 0 1 1 0 1 1 1 0 ...
##  $ enginelocation       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ wheelbase            : num  88.6 88.6 94.5 99.8 99.4 ...
##  $ carlength            : num  169 169 171 177 177 ...
##  $ carwidth             : num  64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
##  $ carheight            : num  48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
##  $ curbweight           : int  2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
##  $ enginesize           : int  130 130 152 109 136 136 136 136 131 131 ...
##  $ boreratio            : num  3.47 3.47 2.68 3.19 3.19 3.19 3.19 3.19 3.13 3.13 ...
##  $ stroke               : num  2.68 2.68 3.47 3.4 3.4 3.4 3.4 3.4 3.4 3.4 ...
##  $ compressionratio     : num  9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
##  $ horsepower           : int  111 111 154 102 115 110 110 110 140 160 ...
##  $ peakrpm              : int  5000 5000 5000 5500 5500 5500 5500 5500 5500 5500 ...
##  $ citympg              : int  21 21 19 24 18 19 19 19 17 16 ...
##  $ highwaympg           : int  27 27 26 30 22 25 25 25 20 22 ...
##  $ price                : num  13495 16500 16500 13950 17450 ...
##  $ drivewheelfwd        : num  0 0 0 1 0 1 1 1 1 0 ...
##  $ drivewheelrwd        : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ carbodyhardtop       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carbodyhatchback     : num  0 0 1 0 0 0 0 0 0 1 ...
##  $ carbodysedan         : num  0 0 0 1 1 1 1 0 1 0 ...
##  $ carbodywagon         : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ enginetypedohcv      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enginetypel          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enginetypeohc        : num  0 0 0 1 1 1 1 1 1 1 ...
##  $ enginetypeohcf       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enginetypeohcv       : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ enginetyperotor      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cylindernumberfive   : num  0 0 0 0 1 1 1 1 1 1 ...
##  $ cylindernumberfour   : num  1 1 0 1 0 0 0 0 0 0 ...
##  $ cylindernumbersix    : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ cylindernumberthree  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cylindernumbertwelve : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cylindernumbertwo    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fuelsystem2bbl       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fuelsystem4bbl       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fuelsystemidi        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fuelsystemmfi        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fuelsystemmpfi       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ fuelsystemspdi       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fuelsystemspfi       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameAUDI      : num  0 0 0 1 1 1 1 1 1 1 ...
##  $ companynameBMW       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameBUICK     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameCHEVROLET : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameDODGE     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameHONDA     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameISUZU     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameJAGUAR    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameMAZDA     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameMERCURY   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameMITSUBISHI: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameNISSAN    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynamePEUGEOT   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynamePLYMOUTH  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynamePORSCHE   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameRENAULT   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameSAAB      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameSUBARU    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameTOYOTA    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameVOLKSWAGEN: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ companynameVOLVO     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ symboling-1          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ symboling0           : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ symboling1           : num  0 0 1 0 0 0 1 1 1 0 ...
##  $ symboling2           : num  0 0 0 1 1 1 0 0 0 0 ...
##  $ symboling3           : num  1 1 0 0 0 0 0 0 0 0 ...

Menghitung jumlah nilai yang kosong

sum(is.na(carmodel))
## [1] 0

Mengatur data training dan testing

set.seed(12345)
trainindices <- sample(1:nrow(carmodel),0.7*nrow(carmodel))
train = carmodel[trainindices,]
test = carmodel[-trainindices,]

Mencoba membuat model untuk melihat variabel yang berpengaruh

model_1 <- lm(price~.,data = train)
summary(model_1)
## 
## Call:
## lm(formula = price ~ ., data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2512.1  -673.1     0.0   653.2  3035.6 
## 
## Coefficients: (5 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -2.188e+04  2.071e+04  -1.057 0.293903    
## car_ID                 1.569e+02  6.634e+01   2.364 0.020547 *  
## fueltype               1.323e+04  7.902e+03   1.674 0.098197 .  
## aspiration            -2.213e+03  1.264e+03  -1.751 0.083908 .  
## doornumber             2.048e+02  6.150e+02   0.333 0.740059    
## enginelocation        -3.613e+04  1.073e+04  -3.368 0.001179 ** 
## wheelbase             -3.203e+01  1.333e+02  -0.240 0.810767    
## carlength              1.161e+01  6.664e+01   0.174 0.862153    
## carwidth               7.942e+02  2.710e+02   2.930 0.004442 ** 
## carheight              2.455e+02  1.778e+02   1.380 0.171420    
## curbweight             4.665e+00  1.772e+00   2.632 0.010215 *  
## enginesize             7.670e+01  3.110e+01   2.466 0.015858 *  
## boreratio             -2.709e+03  1.970e+03  -1.375 0.173061    
## stroke                -1.076e+03  1.143e+03  -0.941 0.349410    
## compressionratio      -1.026e+03  5.817e+02  -1.763 0.081751 .  
## horsepower            -1.407e+01  3.029e+01  -0.465 0.643509    
## peakrpm                1.756e+00  8.101e-01   2.167 0.033278 *  
## citympg                1.371e+02  1.760e+02   0.779 0.438462    
## highwaympg             4.996e+01  1.678e+02   0.298 0.766758    
## drivewheelfwd          6.152e+02  9.472e+02   0.650 0.517883    
## drivewheelrwd         -3.218e+02  1.333e+03  -0.241 0.809851    
## carbodyhardtop        -2.439e+03  1.155e+03  -2.112 0.037899 *  
## carbodyhatchback      -2.773e+03  1.101e+03  -2.519 0.013800 *  
## carbodysedan          -3.644e+03  1.244e+03  -2.930 0.004444 ** 
## carbodywagon          -4.512e+03  1.364e+03  -3.309 0.001417 ** 
## enginetypedohcv       -3.529e+03  5.088e+03  -0.694 0.490020    
## enginetypel           -2.297e+04  7.896e+03  -2.910 0.004714 ** 
## enginetypeohc         -1.636e+02  1.424e+03  -0.115 0.908826    
## enginetypeohcf        -2.701e+04  1.033e+04  -2.614 0.010722 *  
## enginetypeohcv        -2.547e+03  1.340e+03  -1.901 0.060944 .  
## enginetyperotor        1.016e+04  5.377e+03   1.889 0.062632 .  
## cylindernumberfive    -4.201e+03  3.409e+03  -1.232 0.221474    
## cylindernumberfour    -2.977e+03  4.205e+03  -0.708 0.481063    
## cylindernumbersix     -2.560e+03  3.378e+03  -0.758 0.450904    
## cylindernumberthree    2.251e+04  8.897e+03   2.530 0.013406 *  
## cylindernumbertwelve   4.266e+02  5.535e+03   0.077 0.938761    
## cylindernumbertwo             NA         NA      NA       NA    
## fuelsystem2bbl         2.300e+03  1.505e+03   1.528 0.130524    
## fuelsystem4bbl        -2.155e+02  2.460e+03  -0.088 0.930408    
## fuelsystemidi                 NA         NA      NA       NA    
## fuelsystemmfi          9.559e+02  2.655e+03   0.360 0.719765    
## fuelsystemmpfi         1.749e+03  1.628e+03   1.074 0.285946    
## fuelsystemspdi         1.058e+03  1.870e+03   0.566 0.573245    
## fuelsystemspfi                NA         NA      NA       NA    
## companynameAUDI       -4.301e+03  2.805e+03  -1.533 0.129222    
## companynameBMW         3.169e+03  2.663e+03   1.190 0.237616    
## companynameBUICK      -5.803e+03  4.928e+03  -1.178 0.242533    
## companynameCHEVROLET  -8.571e+03  3.341e+03  -2.566 0.012220 *  
## companynameDODGE      -1.036e+04  2.913e+03  -3.554 0.000647 ***
## companynameHONDA      -9.615e+03  3.684e+03  -2.610 0.010851 *  
## companynameISUZU      -1.024e+04  4.195e+03  -2.442 0.016879 *  
## companynameJAGUAR     -4.234e+03  3.791e+03  -1.117 0.267500    
## companynameMAZDA      -1.563e+04  4.831e+03  -3.235 0.001783 ** 
## companynameMERCURY    -1.590e+04  5.885e+03  -2.701 0.008478 ** 
## companynameMITSUBISHI -1.917e+04  6.176e+03  -3.104 0.002662 ** 
## companynameNISSAN     -2.013e+04  7.223e+03  -2.787 0.006687 ** 
## companynamePEUGEOT            NA         NA      NA       NA    
## companynamePLYMOUTH   -2.481e+04  8.678e+03  -2.859 0.005449 ** 
## companynamePORSCHE    -1.347e+04  8.784e+03  -1.533 0.129200    
## companynameRENAULT    -2.650e+04  9.070e+03  -2.922 0.004549 ** 
## companynameSAAB       -2.419e+04  9.471e+03  -2.554 0.012587 *  
## companynameSUBARU             NA         NA      NA       NA    
## companynameTOYOTA     -3.040e+04  1.137e+04  -2.674 0.009117 ** 
## companynameVOLKSWAGEN -3.382e+04  1.307e+04  -2.588 0.011523 *  
## companynameVOLVO      -3.038e+04  1.291e+04  -2.353 0.021169 *  
## `symboling-1`          1.965e+03  1.424e+03   1.380 0.171596    
## symboling0             3.400e+03  1.762e+03   1.929 0.057326 .  
## symboling1             4.117e+03  1.811e+03   2.273 0.025756 *  
## symboling2             3.438e+03  1.826e+03   1.883 0.063409 .  
## symboling3             2.571e+03  1.908e+03   1.347 0.181725    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1542 on 78 degrees of freedom
## Multiple R-squared:  0.9805, Adjusted R-squared:  0.9645 
## F-statistic: 61.28 on 64 and 78 DF,  p-value: < 2.2e-16
#step <- stepAIC(model_1,direction = "both")
#summary(step)
#vif(step) untuk menentukan variabel yang akan dikurangi

Menghapus variabel Fuel, System, MPFI

model_2 <-lm(formula = price ~ car_ID + fueltype + aspiration + enginelocation + 
               carwidth + carheight + curbweight + enginesize + boreratio + 
               stroke + compressionratio + peakrpm + citympg + carbodyhardtop + 
               carbodyhatchback + carbodysedan + carbodywagon + enginetypel + 
               enginetypeohcf + enginetypeohcv + enginetyperotor + cylindernumberfive + 
               cylindernumberthree + fuelsystem2bbl  + companynameBMW + 
               companynameCHEVROLET + companynameDODGE + companynameHONDA + 
               companynameISUZU + companynameMAZDA + companynameMERCURY + 
               companynameMITSUBISHI + companynameNISSAN + companynamePLYMOUTH + 
               companynamePORSCHE + companynameRENAULT + companynameSAAB + 
               companynameTOYOTA + companynameVOLKSWAGEN + companynameVOLVO + 
               `symboling-1` + symboling0 + symboling1 + symboling2 + symboling3, 
             data = train)
step2 <- stepAIC(model_2,direction = "both")
## Start:  AIC=2118.93
## price ~ car_ID + fueltype + aspiration + enginelocation + carwidth + 
##     carheight + curbweight + enginesize + boreratio + stroke + 
##     compressionratio + peakrpm + citympg + carbodyhardtop + carbodyhatchback + 
##     carbodysedan + carbodywagon + enginetypel + enginetypeohcf + 
##     enginetypeohcv + enginetyperotor + cylindernumberfive + cylindernumberthree + 
##     fuelsystem2bbl + companynameBMW + companynameCHEVROLET + 
##     companynameDODGE + companynameHONDA + companynameISUZU + 
##     companynameMAZDA + companynameMERCURY + companynameMITSUBISHI + 
##     companynameNISSAN + companynamePLYMOUTH + companynamePORSCHE + 
##     companynameRENAULT + companynameSAAB + companynameTOYOTA + 
##     companynameVOLKSWAGEN + companynameVOLVO + `symboling-1` + 
##     symboling0 + symboling1 + symboling2 + symboling3
## 
##                         Df Sum of Sq       RSS    AIC
## <none>                               204720833 2118.9
## - symboling3             1   3783839 208504672 2119.6
## - stroke                 1   4288290 209009122 2119.9
## - `symboling-1`          1   5490039 210210872 2120.7
## - carheight              1   5640068 210360901 2120.8
## - fuelsystem2bbl         1   6055432 210776264 2121.1
## - enginetypeohcv         1   6687636 211408468 2121.5
## - symboling2             1   6907444 211628276 2121.7
## - fueltype               1   8638298 213359131 2122.8
## - symboling0             1   9032729 213753562 2123.1
## - citympg                1  10023715 214744547 2123.8
## - symboling1             1  11490574 216211407 2124.7
## - compressionratio       1  12050667 216771499 2125.1
## - aspiration             1  13193728 217914561 2125.9
## - peakrpm                1  14896652 219617485 2127.0
## - cylindernumberfive     1  15934994 220655827 2127.7
## - carbodyhardtop         1  16699249 221420082 2128.1
## - boreratio              1  17155817 221876650 2128.4
## - companynamePORSCHE     1  19736609 224457441 2130.1
## - curbweight             1  20674401 225395234 2130.7
## - companynameCHEVROLET   1  22336176 227057009 2131.7
## - carbodyhatchback       1  32746549 237467381 2138.2
## - carbodysedan           1  34673500 239394333 2139.3
## - carbodywagon           1  41524173 246245006 2143.3
## - companynameBMW         1  41564228 246285060 2143.4
## - companynameISUZU       1  44077706 248798538 2144.8
## - carwidth               1  51150832 255871665 2148.8
## - companynameMERCURY     1  54504128 259224960 2150.7
## - enginesize             1  62111640 266832473 2154.8
## - companynameHONDA       1  63450033 268170866 2155.5
## - car_ID                 1  64438608 269159441 2156.1
## - companynameSAAB        1  64613388 269334221 2156.2
## - companynameVOLVO       1  74868803 279589636 2161.5
## - companynameVOLKSWAGEN  1  75457322 280178155 2161.8
## - companynameDODGE       1  82010457 286731289 2165.1
## - companynameRENAULT     1  83038614 287759447 2165.6
## - enginetypeohcf         1  84939001 289659833 2166.6
## - cylindernumberthree    1  91875876 296596709 2169.9
## - companynameTOYOTA      1  95164070 299884902 2171.5
## - enginetyperotor        1 106382816 311103649 2176.8
## - companynameNISSAN      1 110528783 315249616 2178.7
## - companynamePLYMOUTH    1 111238939 315959772 2179.0
## - enginetypel            1 116350650 321071483 2181.3
## - enginelocation         1 122214869 326935701 2183.9
## - companynameMAZDA       1 129078970 333799803 2186.8
## - companynameMITSUBISHI  1 129730013 334450846 2187.1
#summary(step2)
summary(model_2)
## 
## Call:
## lm(formula = price ~ car_ID + fueltype + aspiration + enginelocation + 
##     carwidth + carheight + curbweight + enginesize + boreratio + 
##     stroke + compressionratio + peakrpm + citympg + carbodyhardtop + 
##     carbodyhatchback + carbodysedan + carbodywagon + enginetypel + 
##     enginetypeohcf + enginetypeohcv + enginetyperotor + cylindernumberfive + 
##     cylindernumberthree + fuelsystem2bbl + companynameBMW + companynameCHEVROLET + 
##     companynameDODGE + companynameHONDA + companynameISUZU + 
##     companynameMAZDA + companynameMERCURY + companynameMITSUBISHI + 
##     companynameNISSAN + companynamePLYMOUTH + companynamePORSCHE + 
##     companynameRENAULT + companynameSAAB + companynameTOYOTA + 
##     companynameVOLKSWAGEN + companynameVOLVO + `symboling-1` + 
##     symboling0 + symboling1 + symboling2 + symboling3, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2459.5  -756.8     0.0   722.9  3535.9 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -2.667e+04  1.326e+04  -2.012 0.047002 *  
## car_ID                 1.146e+02  2.074e+01   5.526 2.76e-07 ***
## fueltype               1.129e+04  5.581e+03   2.023 0.045814 *  
## aspiration            -1.596e+03  6.385e+02  -2.500 0.014087 *  
## enginelocation        -2.894e+04  3.803e+03  -7.610 1.79e-11 ***
## carwidth               8.379e+02  1.702e+02   4.923 3.49e-06 ***
## carheight              1.811e+02  1.108e+02   1.635 0.105346    
## curbweight             3.644e+00  1.164e+00   3.130 0.002311 ** 
## enginesize             7.203e+01  1.328e+01   5.425 4.26e-07 ***
## boreratio             -2.919e+03  1.024e+03  -2.851 0.005323 ** 
## stroke                -1.190e+03  8.346e+02  -1.425 0.157242    
## compressionratio      -9.442e+02  3.951e+02  -2.390 0.018804 *  
## peakrpm                1.386e+00  5.217e-01   2.657 0.009226 ** 
## citympg                1.403e+02  6.439e+01   2.179 0.031730 *  
## carbodyhardtop        -2.807e+03  9.979e+02  -2.813 0.005944 ** 
## carbodyhatchback      -3.468e+03  8.803e+02  -3.939 0.000154 ***
## carbodysedan          -3.833e+03  9.456e+02  -4.053 0.000102 ***
## carbodywagon          -4.682e+03  1.055e+03  -4.436 2.42e-05 ***
## enginetypel           -1.679e+04  2.262e+03  -7.425 4.38e-11 ***
## enginetypeohcf        -1.924e+04  3.033e+03  -6.344 7.14e-09 ***
## enginetypeohcv        -1.617e+03  9.085e+02  -1.780 0.078193 .  
## enginetyperotor        1.045e+04  1.471e+03   7.100 2.08e-10 ***
## cylindernumberfive    -3.047e+03  1.109e+03  -2.748 0.007155 ** 
## cylindernumberthree    2.033e+04  3.082e+03   6.598 2.21e-09 ***
## fuelsystem2bbl         8.587e+02  5.070e+02   1.694 0.093501 .  
## companynameBMW         4.772e+03  1.075e+03   4.438 2.40e-05 ***
## companynameCHEVROLET  -6.077e+03  1.868e+03  -3.253 0.001571 ** 
## companynameDODGE      -7.329e+03  1.176e+03  -6.234 1.18e-08 ***
## companynameHONDA      -7.242e+03  1.321e+03  -5.483 3.32e-07 ***
## companynameISUZU      -7.221e+03  1.580e+03  -4.570 1.44e-05 ***
## companynameMAZDA      -1.116e+04  1.427e+03  -7.820 6.45e-12 ***
## companynameMERCURY    -1.155e+04  2.273e+03  -5.082 1.82e-06 ***
## companynameMITSUBISHI -1.386e+04  1.767e+03  -7.840 5.86e-12 ***
## companynameNISSAN     -1.436e+04  1.984e+03  -7.237 1.08e-10 ***
## companynamePLYMOUTH   -1.815e+04  2.500e+03  -7.260 9.67e-11 ***
## companynamePORSCHE    -7.995e+03  2.615e+03  -3.058 0.002880 ** 
## companynameRENAULT    -1.847e+04  2.945e+03  -6.273 9.90e-09 ***
## companynameSAAB       -1.613e+04  2.916e+03  -5.533 2.67e-07 ***
## companynameTOYOTA     -2.194e+04  3.268e+03  -6.715 1.28e-09 ***
## companynameVOLKSWAGEN -2.349e+04  3.928e+03  -5.979 3.73e-08 ***
## companynameVOLVO      -2.126e+04  3.569e+03  -5.956 4.14e-08 ***
## `symboling-1`          2.044e+03  1.267e+03   1.613 0.110027    
## symboling0             3.072e+03  1.485e+03   2.069 0.041227 *  
## symboling1             3.535e+03  1.515e+03   2.333 0.021696 *  
## symboling2             2.757e+03  1.524e+03   1.809 0.073532 .  
## symboling3             2.126e+03  1.588e+03   1.339 0.183710    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1453 on 97 degrees of freedom
## Multiple R-squared:  0.9785, Adjusted R-squared:  0.9685 
## F-statistic: 97.98 on 45 and 97 DF,  p-value: < 2.2e-16
#vif(model_2)

Menghapus beberapa variabel yang memiliki nilai p-value lebih dari 0.05 hingga mendapatkan model terbaik dengan nilai AIC terkecil

model_3 <- lm(formula = price ~ enginelocation + 
                 carwidth  + enginesize  + 
                 enginetyperotor  + companynameBMW,
               data = train)
summary(model_3)
## 
## Call:
## lm(formula = price ~ enginelocation + carwidth + enginesize + 
##     enginetyperotor + companynameBMW, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5574.0 -1768.7    35.4  1460.3  6817.3 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -69020.179   8883.196  -7.770 1.67e-12 ***
## enginelocation  -16245.699   1687.905  -9.625  < 2e-16 ***
## carwidth          1280.438    151.935   8.428 4.31e-14 ***
## enginesize         104.741      8.115  12.907  < 2e-16 ***
## enginetyperotor   7171.787   1610.400   4.453 1.74e-05 ***
## companynameBMW    7348.186   1126.801   6.521 1.24e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2665 on 137 degrees of freedom
## Multiple R-squared:  0.8977, Adjusted R-squared:  0.894 
## F-statistic: 240.5 on 5 and 137 DF,  p-value: < 2.2e-16

Melihat kesesuaian model

library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 3.5.3
## ResourceSelection 0.3-5   2019-07-22
hoslem.test(train$price, fitted(model_3), g=10)
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  train$price, fitted(model_3)
## X-squared = -2.0765, df = 8, p-value = 1

Prediksi harga

prediksi_harga <- predict(model_3, test[,-which(colnames(test)=='price')])
test$prediksi_harga <- prediksi_harga
head(test)
##    car_ID fueltype aspiration doornumber enginelocation wheelbase
## 4       4        0          1          1              1      99.8
## 5       5        0          1          1              1      99.4
## 6       6        0          1          0              1      99.8
## 16     16        0          1          1              1     103.5
## 17     17        0          1          0              1     103.5
## 20     20        0          1          0              1      94.5
##    carlength carwidth carheight curbweight enginesize boreratio stroke
## 4      176.6     66.2      54.3       2337        109      3.19   3.40
## 5      176.6     66.4      54.3       2824        136      3.19   3.40
## 6      177.3     66.3      53.1       2507        136      3.19   3.40
## 16     189.0     66.9      55.7       3230        209      3.62   3.39
## 17     193.8     67.9      53.7       3380        209      3.62   3.39
## 20     155.9     63.6      52.0       1874         90      3.03   3.11
##    compressionratio horsepower peakrpm citympg highwaympg price
## 4              10.0        102    5500      24         30 13950
## 5               8.0        115    5500      18         22 17450
## 6               8.5        110    5500      19         25 15250
## 16              8.0        182    5400      16         22 30760
## 17              8.0        182    5400      16         22 41315
## 20              9.6         70    5400      38         43  6295
##    drivewheelfwd drivewheelrwd carbodyhardtop carbodyhatchback
## 4              1             0              0                0
## 5              0             0              0                0
## 6              1             0              0                0
## 16             0             1              0                0
## 17             0             1              0                0
## 20             1             0              0                1
##    carbodysedan carbodywagon enginetypedohcv enginetypel enginetypeohc
## 4             1            0               0           0             1
## 5             1            0               0           0             1
## 6             1            0               0           0             1
## 16            1            0               0           0             1
## 17            1            0               0           0             1
## 20            0            0               0           0             1
##    enginetypeohcf enginetypeohcv enginetyperotor cylindernumberfive
## 4               0              0               0                  0
## 5               0              0               0                  1
## 6               0              0               0                  1
## 16              0              0               0                  0
## 17              0              0               0                  0
## 20              0              0               0                  0
##    cylindernumberfour cylindernumbersix cylindernumberthree
## 4                   1                 0                   0
## 5                   0                 0                   0
## 6                   0                 0                   0
## 16                  0                 1                   0
## 17                  0                 1                   0
## 20                  1                 0                   0
##    cylindernumbertwelve cylindernumbertwo fuelsystem2bbl fuelsystem4bbl
## 4                     0                 0              0              0
## 5                     0                 0              0              0
## 6                     0                 0              0              0
## 16                    0                 0              0              0
## 17                    0                 0              0              0
## 20                    0                 0              1              0
##    fuelsystemidi fuelsystemmfi fuelsystemmpfi fuelsystemspdi
## 4              0             0              1              0
## 5              0             0              1              0
## 6              0             0              1              0
## 16             0             0              1              0
## 17             0             0              1              0
## 20             0             0              0              0
##    fuelsystemspfi companynameAUDI companynameBMW companynameBUICK
## 4               0               1              0                0
## 5               0               1              0                0
## 6               0               1              0                0
## 16              0               0              1                0
## 17              0               0              1                0
## 20              0               0              0                0
##    companynameCHEVROLET companynameDODGE companynameHONDA companynameISUZU
## 4                     0                0                0                0
## 5                     0                0                0                0
## 6                     0                0                0                0
## 16                    0                0                0                0
## 17                    0                0                0                0
## 20                    1                0                0                0
##    companynameJAGUAR companynameMAZDA companynameMERCURY
## 4                  0                0                  0
## 5                  0                0                  0
## 6                  0                0                  0
## 16                 0                0                  0
## 17                 0                0                  0
## 20                 0                0                  0
##    companynameMITSUBISHI companynameNISSAN companynamePEUGEOT
## 4                      0                 0                  0
## 5                      0                 0                  0
## 6                      0                 0                  0
## 16                     0                 0                  0
## 17                     0                 0                  0
## 20                     0                 0                  0
##    companynamePLYMOUTH companynamePORSCHE companynameRENAULT
## 4                    0                  0                  0
## 5                    0                  0                  0
## 6                    0                  0                  0
## 16                   0                  0                  0
## 17                   0                  0                  0
## 20                   0                  0                  0
##    companynameSAAB companynameSUBARU companynameTOYOTA
## 4                0                 0                 0
## 5                0                 0                 0
## 6                0                 0                 0
## 16               0                 0                 0
## 17               0                 0                 0
## 20               0                 0                 0
##    companynameVOLKSWAGEN companynameVOLVO symboling-1 symboling0
## 4                      0                0           0          0
## 5                      0                0           0          0
## 6                      0                0           0          0
## 16                     0                0           0          1
## 17                     0                0           0          1
## 20                     0                0           0          0
##    symboling1 symboling2 symboling3 prediksi_harga
## 4           0          1          0      10915.855
## 5           0          1          0      13999.945
## 6           0          1          0      13871.901
## 16          0          0          0      29634.431
## 17          0          0          0      30914.869
## 20          1          0          0       5596.641

Melihat korelasi hasil prediksi dengan data asli

r <- cor(test$price,test$prediksi_harga)
r # 0.9456267
## [1] 0.9456267

Melihat nilai MAPE

library(forecast)
## Warning: package 'forecast' was built under R version 3.5.3
accuracy(prediksi_harga,test$price)
##                ME     RMSE      MAE      MPE     MAPE
## Test set 644.5205 2612.701 1953.348 1.884702 15.65352

Kesimpulan Dari hasil analisis dari model regresi linier berganda memiliki nilai eror MAPE sebesar 15,6% yang memiliki kriteia BAIK dan untuk korelasi dari data asli dengan dengan hasil prediksi pada data testing sebesar 0.9456267.