library(glmnet)
## Warning: package 'glmnet' was built under R version 4.3.1
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(lars)
## Loaded lars 1.3
library(car)
## Loading required package: carData
library(corrplot)
## corrplot 0.92 loaded
library(Matrix)
library(plotmo)
## Warning: package 'plotmo' was built under R version 4.3.1
## Loading required package: Formula
## Loading required package: plotrix
## Loading required package: TeachingDemos
## Warning: package 'TeachingDemos' was built under R version 4.3.1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.3.1
## Loading required package: ggplot2
## Loading required package: lattice
library(repr)
## Warning: package 'repr' was built under R version 4.3.1
library(cowplot)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(knitr)
library(mlr3verse)
## Warning: package 'mlr3verse' was built under R version 4.3.1
## Loading required package: mlr3
## Warning: package 'mlr3' was built under R version 4.3.1
library(mlr3fselect)
## Warning: package 'mlr3fselect' was built under R version 4.3.1
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.3.1
library(qgraph)
## Warning: package 'qgraph' was built under R version 4.3.1
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ purrr::lift() masks caret::lift()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(glmnetUtils)
## Warning: package 'glmnetUtils' was built under R version 4.3.1
##
## Attaching package: 'glmnetUtils'
##
## The following objects are masked from 'package:glmnet':
##
## cv.glmnet, glmnet
library(leaps)
library(varbvs)
## Warning: package 'varbvs' was built under R version 4.3.1
library(ggplot2)
library(tidyverse)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following object is masked from 'package:purrr':
##
## compact
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(readr)
library(skimr)
## Warning: package 'skimr' was built under R version 4.3.1
##
## Attaching package: 'skimr'
##
## The following object is masked from 'package:mlr3':
##
## partition
library(leaps)
library(FactoMineR)
library(rio)
## Warning: package 'rio' was built under R version 4.3.1
library(imputeTS)
## Warning: package 'imputeTS' was built under R version 4.3.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'imputeTS'
##
## The following object is masked from 'package:zoo':
##
## na.locf
##
## The following object is masked from 'package:glmnet':
##
## na.replace
lapply(c("glmnet","lmridge"),library,character.only=T)[[1]]
##
## Attaching package: 'lmridge'
## The following object is masked from 'package:car':
##
## vif
## [1] "imputeTS" "rio" "FactoMineR"
## [4] "skimr" "plyr" "varbvs"
## [7] "leaps" "glmnetUtils" "broom"
## [10] "lubridate" "forcats" "stringr"
## [13] "purrr" "readr" "tidyr"
## [16] "tibble" "tidyverse" "PerformanceAnalytics"
## [19] "xts" "zoo" "qgraph"
## [22] "DataExplorer" "mlr3fselect" "mlr3verse"
## [25] "mlr3" "knitr" "factoextra"
## [28] "cowplot" "repr" "caret"
## [31] "lattice" "ggplot2" "dplyr"
## [34] "plotmo" "TeachingDemos" "plotrix"
## [37] "Formula" "corrplot" "car"
## [40] "carData" "lars" "glmnet"
## [43] "Matrix" "stats" "graphics"
## [46] "grDevices" "utils" "datasets"
## [49] "methods" "base"
Data yang digunakan yaitu dataset Cars93 dari library(MASS). Dataset Cars93 terdiri atas 93 amatan dan 27 peubah dengan rincian tipe peubah. Hanya peubah numerik yang akan digunakan dalam analisis.
?Cars93
## starting httpd help server ... done
datawal<-data(Cars93,package="MASS")
View(Cars93)
str(Cars93)
## 'data.frame': 93 obs. of 27 variables:
## $ Manufacturer : Factor w/ 32 levels "Acura","Audi",..: 1 1 2 2 3 4 4 4 4 5 ...
## $ Model : Factor w/ 93 levels "100","190E","240",..: 49 56 9 1 6 24 54 74 73 35 ...
## $ Type : Factor w/ 6 levels "Compact","Large",..: 4 3 1 3 3 3 2 2 3 2 ...
## $ Min.Price : num 12.9 29.2 25.9 30.8 23.7 14.2 19.9 22.6 26.3 33 ...
## $ Price : num 15.9 33.9 29.1 37.7 30 15.7 20.8 23.7 26.3 34.7 ...
## $ Max.Price : num 18.8 38.7 32.3 44.6 36.2 17.3 21.7 24.9 26.3 36.3 ...
## $ MPG.city : int 25 18 20 19 22 22 19 16 19 16 ...
## $ MPG.highway : int 31 25 26 26 30 31 28 25 27 25 ...
## $ AirBags : Factor w/ 3 levels "Driver & Passenger",..: 3 1 2 1 2 2 2 2 2 2 ...
## $ DriveTrain : Factor w/ 3 levels "4WD","Front",..: 2 2 2 2 3 2 2 3 2 2 ...
## $ Cylinders : Factor w/ 6 levels "3","4","5","6",..: 2 4 4 4 2 2 4 4 4 5 ...
## $ EngineSize : num 1.8 3.2 2.8 2.8 3.5 2.2 3.8 5.7 3.8 4.9 ...
## $ Horsepower : int 140 200 172 172 208 110 170 180 170 200 ...
## $ RPM : int 6300 5500 5500 5500 5700 5200 4800 4000 4800 4100 ...
## $ Rev.per.mile : int 2890 2335 2280 2535 2545 2565 1570 1320 1690 1510 ...
## $ Man.trans.avail : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 1 1 ...
## $ Fuel.tank.capacity: num 13.2 18 16.9 21.1 21.1 16.4 18 23 18.8 18 ...
## $ Passengers : int 5 5 5 6 4 6 6 6 5 6 ...
## $ Length : int 177 195 180 193 186 189 200 216 198 206 ...
## $ Wheelbase : int 102 115 102 106 109 105 111 116 108 114 ...
## $ Width : int 68 71 67 70 69 69 74 78 73 73 ...
## $ Turn.circle : int 37 38 37 37 39 41 42 45 41 43 ...
## $ Rear.seat.room : num 26.5 30 28 31 27 28 30.5 30.5 26.5 35 ...
## $ Luggage.room : int 11 15 14 17 13 16 17 21 14 18 ...
## $ Weight : int 2705 3560 3375 3405 3640 2880 3470 4105 3495 3620 ...
## $ Origin : Factor w/ 2 levels "USA","non-USA": 2 2 2 2 2 1 1 1 1 1 ...
## $ Make : Factor w/ 93 levels "Acura Integra",..: 1 2 4 3 5 6 7 9 8 10 ...
Penggunaan regresi Ridge dan Lasso mengharuskan data yang digunakan minimal bertipe rasio/numerik. Namun, berdasarkan output di atas, terdapat beberapa peubah yang masih bertipe “chr” (karakter). Oleh karena itu, akan dilakukan pre-processing data untuk memilih peubah yang digunakan. Peubah yang digunakan berjumlah 15.
data_psd = import ("https://raw.githubusercontent.com/dnchari/Rdatasets/master/csv/MASS/Cars93.csv")
data_psd
## V1 Manufacturer Model Type Min.Price Price Max.Price MPG.city
## 1 1 Acura Integra Small 12.9 15.9 18.8 25
## 2 2 Acura Legend Midsize 29.2 33.9 38.7 18
## 3 3 Audi 90 Compact 25.9 29.1 32.3 20
## 4 4 Audi 100 Midsize 30.8 37.7 44.6 19
## 5 5 BMW 535i Midsize 23.7 30.0 36.2 22
## 6 6 Buick Century Midsize 14.2 15.7 17.3 22
## 7 7 Buick LeSabre Large 19.9 20.8 21.7 19
## 8 8 Buick Roadmaster Large 22.6 23.7 24.9 16
## 9 9 Buick Riviera Midsize 26.3 26.3 26.3 19
## 10 10 Cadillac DeVille Large 33.0 34.7 36.3 16
## 11 11 Cadillac Seville Midsize 37.5 40.1 42.7 16
## 12 12 Chevrolet Cavalier Compact 8.5 13.4 18.3 25
## 13 13 Chevrolet Corsica Compact 11.4 11.4 11.4 25
## 14 14 Chevrolet Camaro Sporty 13.4 15.1 16.8 19
## 15 15 Chevrolet Lumina Midsize 13.4 15.9 18.4 21
## 16 16 Chevrolet Lumina_APV Van 14.7 16.3 18.0 18
## 17 17 Chevrolet Astro Van 14.7 16.6 18.6 15
## 18 18 Chevrolet Caprice Large 18.0 18.8 19.6 17
## 19 19 Chevrolet Corvette Sporty 34.6 38.0 41.5 17
## 20 20 Chrylser Concorde Large 18.4 18.4 18.4 20
## 21 21 Chrysler LeBaron Compact 14.5 15.8 17.1 23
## 22 22 Chrysler Imperial Large 29.5 29.5 29.5 20
## 23 23 Dodge Colt Small 7.9 9.2 10.6 29
## 24 24 Dodge Shadow Small 8.4 11.3 14.2 23
## 25 25 Dodge Spirit Compact 11.9 13.3 14.7 22
## 26 26 Dodge Caravan Van 13.6 19.0 24.4 17
## 27 27 Dodge Dynasty Midsize 14.8 15.6 16.4 21
## 28 28 Dodge Stealth Sporty 18.5 25.8 33.1 18
## 29 29 Eagle Summit Small 7.9 12.2 16.5 29
## 30 30 Eagle Vision Large 17.5 19.3 21.2 20
## 31 31 Ford Festiva Small 6.9 7.4 7.9 31
## 32 32 Ford Escort Small 8.4 10.1 11.9 23
## 33 33 Ford Tempo Compact 10.4 11.3 12.2 22
## 34 34 Ford Mustang Sporty 10.8 15.9 21.0 22
## 35 35 Ford Probe Sporty 12.8 14.0 15.2 24
## 36 36 Ford Aerostar Van 14.5 19.9 25.3 15
## 37 37 Ford Taurus Midsize 15.6 20.2 24.8 21
## 38 38 Ford Crown_Victoria Large 20.1 20.9 21.7 18
## 39 39 Geo Metro Small 6.7 8.4 10.0 46
## 40 40 Geo Storm Sporty 11.5 12.5 13.5 30
## 41 41 Honda Prelude Sporty 17.0 19.8 22.7 24
## 42 42 Honda Civic Small 8.4 12.1 15.8 42
## 43 43 Honda Accord Compact 13.8 17.5 21.2 24
## 44 44 Hyundai Excel Small 6.8 8.0 9.2 29
## 45 45 Hyundai Elantra Small 9.0 10.0 11.0 22
## 46 46 Hyundai Scoupe Sporty 9.1 10.0 11.0 26
## 47 47 Hyundai Sonata Midsize 12.4 13.9 15.3 20
## 48 48 Infiniti Q45 Midsize 45.4 47.9 50.4 17
## 49 49 Lexus ES300 Midsize 27.5 28.0 28.4 18
## 50 50 Lexus SC300 Midsize 34.7 35.2 35.6 18
## 51 51 Lincoln Continental Midsize 33.3 34.3 35.3 17
## 52 52 Lincoln Town_Car Large 34.4 36.1 37.8 18
## 53 53 Mazda 323 Small 7.4 8.3 9.1 29
## 54 54 Mazda Protege Small 10.9 11.6 12.3 28
## 55 55 Mazda 626 Compact 14.3 16.5 18.7 26
## 56 56 Mazda MPV Van 16.6 19.1 21.7 18
## 57 57 Mazda RX-7 Sporty 32.5 32.5 32.5 17
## 58 58 Mercedes-Benz 190E Compact 29.0 31.9 34.9 20
## 59 59 Mercedes-Benz 300E Midsize 43.8 61.9 80.0 19
## 60 60 Mercury Capri Sporty 13.3 14.1 15.0 23
## 61 61 Mercury Cougar Midsize 14.9 14.9 14.9 19
## 62 62 Mitsubishi Mirage Small 7.7 10.3 12.9 29
## 63 63 Mitsubishi Diamante Midsize 22.4 26.1 29.9 18
## 64 64 Nissan Sentra Small 8.7 11.8 14.9 29
## 65 65 Nissan Altima Compact 13.0 15.7 18.3 24
## 66 66 Nissan Quest Van 16.7 19.1 21.5 17
## 67 67 Nissan Maxima Midsize 21.0 21.5 22.0 21
## 68 68 Oldsmobile Achieva Compact 13.0 13.5 14.0 24
## 69 69 Oldsmobile Cutlass_Ciera Midsize 14.2 16.3 18.4 23
## 70 70 Oldsmobile Silhouette Van 19.5 19.5 19.5 18
## 71 71 Oldsmobile Eighty-Eight Large 19.5 20.7 21.9 19
## 72 72 Plymouth Laser Sporty 11.4 14.4 17.4 23
## 73 73 Pontiac LeMans Small 8.2 9.0 9.9 31
## 74 74 Pontiac Sunbird Compact 9.4 11.1 12.8 23
## 75 75 Pontiac Firebird Sporty 14.0 17.7 21.4 19
## 76 76 Pontiac Grand_Prix Midsize 15.4 18.5 21.6 19
## 77 77 Pontiac Bonneville Large 19.4 24.4 29.4 19
## 78 78 Saab 900 Compact 20.3 28.7 37.1 20
## 79 79 Saturn SL Small 9.2 11.1 12.9 28
## 80 80 Subaru Justy Small 7.3 8.4 9.5 33
## 81 81 Subaru Loyale Small 10.5 10.9 11.3 25
## 82 82 Subaru Legacy Compact 16.3 19.5 22.7 23
## 83 83 Suzuki Swift Small 7.3 8.6 10.0 39
## 84 84 Toyota Tercel Small 7.8 9.8 11.8 32
## 85 85 Toyota Celica Sporty 14.2 18.4 22.6 25
## 86 86 Toyota Camry Midsize 15.2 18.2 21.2 22
## 87 87 Toyota Previa Van 18.9 22.7 26.6 18
## 88 88 Volkswagen Fox Small 8.7 9.1 9.5 25
## 89 89 Volkswagen Eurovan Van 16.6 19.7 22.7 17
## 90 90 Volkswagen Passat Compact 17.6 20.0 22.4 21
## 91 91 Volkswagen Corrado Sporty 22.9 23.3 23.7 18
## 92 92 Volvo 240 Compact 21.8 22.7 23.5 21
## 93 93 Volvo 850 Midsize 24.8 26.7 28.5 20
## MPG.highway AirBags DriveTrain Cylinders EngineSize Horsepower
## 1 31 None Front 4 1.8 140
## 2 25 Driver & Passenger Front 6 3.2 200
## 3 26 Driver only Front 6 2.8 172
## 4 26 Driver & Passenger Front 6 2.8 172
## 5 30 Driver only Rear 4 3.5 208
## 6 31 Driver only Front 4 2.2 110
## 7 28 Driver only Front 6 3.8 170
## 8 25 Driver only Rear 6 5.7 180
## 9 27 Driver only Front 6 3.8 170
## 10 25 Driver only Front 8 4.9 200
## 11 25 Driver & Passenger Front 8 4.6 295
## 12 36 None Front 4 2.2 110
## 13 34 Driver only Front 4 2.2 110
## 14 28 Driver & Passenger Rear 6 3.4 160
## 15 29 None Front 4 2.2 110
## 16 23 None Front 6 3.8 170
## 17 20 None 4WD 6 4.3 165
## 18 26 Driver only Rear 8 5.0 170
## 19 25 Driver only Rear 8 5.7 300
## 20 28 Driver & Passenger Front 6 3.3 153
## 21 28 Driver & Passenger Front 4 3.0 141
## 22 26 Driver only Front 6 3.3 147
## 23 33 None Front 4 1.5 92
## 24 29 Driver only Front 4 2.2 93
## 25 27 Driver only Front 4 2.5 100
## 26 21 Driver only 4WD 6 3.0 142
## 27 27 Driver only Front 4 2.5 100
## 28 24 Driver only 4WD 6 3.0 300
## 29 33 None Front 4 1.5 92
## 30 28 Driver & Passenger Front 6 3.5 214
## 31 33 None Front 4 1.3 63
## 32 30 None Front 4 1.8 127
## 33 27 None Front 4 2.3 96
## 34 29 Driver only Rear 4 2.3 105
## 35 30 Driver only Front 4 2.0 115
## 36 20 Driver only 4WD 6 3.0 145
## 37 30 Driver only Front 6 3.0 140
## 38 26 Driver only Rear 8 4.6 190
## 39 50 None Front 3 1.0 55
## 40 36 Driver only Front 4 1.6 90
## 41 31 Driver & Passenger Front 4 2.3 160
## 42 46 Driver only Front 4 1.5 102
## 43 31 Driver & Passenger Front 4 2.2 140
## 44 33 None Front 4 1.5 81
## 45 29 None Front 4 1.8 124
## 46 34 None Front 4 1.5 92
## 47 27 None Front 4 2.0 128
## 48 22 Driver only Rear 8 4.5 278
## 49 24 Driver only Front 6 3.0 185
## 50 23 Driver & Passenger Rear 6 3.0 225
## 51 26 Driver & Passenger Front 6 3.8 160
## 52 26 Driver & Passenger Rear 8 4.6 210
## 53 37 None Front 4 1.6 82
## 54 36 None Front 4 1.8 103
## 55 34 Driver only Front 4 2.5 164
## 56 24 None 4WD 6 3.0 155
## 57 25 Driver only Rear rotary 1.3 255
## 58 29 Driver only Rear 4 2.3 130
## 59 25 Driver & Passenger Rear 6 3.2 217
## 60 26 Driver only Front 4 1.6 100
## 61 26 None Rear 6 3.8 140
## 62 33 None Front 4 1.5 92
## 63 24 Driver only Front 6 3.0 202
## 64 33 Driver only Front 4 1.6 110
## 65 30 Driver only Front 4 2.4 150
## 66 23 None Front 6 3.0 151
## 67 26 Driver only Front 6 3.0 160
## 68 31 None Front 4 2.3 155
## 69 31 Driver only Front 4 2.2 110
## 70 23 None Front 6 3.8 170
## 71 28 Driver only Front 6 3.8 170
## 72 30 None 4WD 4 1.8 92
## 73 41 None Front 4 1.6 74
## 74 31 None Front 4 2.0 110
## 75 28 Driver & Passenger Rear 6 3.4 160
## 76 27 None Front 6 3.4 200
## 77 28 Driver & Passenger Front 6 3.8 170
## 78 26 Driver only Front 4 2.1 140
## 79 38 Driver only Front 4 1.9 85
## 80 37 None 4WD 3 1.2 73
## 81 30 None 4WD 4 1.8 90
## 82 30 Driver only 4WD 4 2.2 130
## 83 43 None Front 3 1.3 70
## 84 37 Driver only Front 4 1.5 82
## 85 32 Driver only Front 4 2.2 135
## 86 29 Driver only Front 4 2.2 130
## 87 22 Driver only 4WD 4 2.4 138
## 88 33 None Front 4 1.8 81
## 89 21 None Front 5 2.5 109
## 90 30 None Front 4 2.0 134
## 91 25 None Front 6 2.8 178
## 92 28 Driver only Rear 4 2.3 114
## 93 28 Driver & Passenger Front 5 2.4 168
## RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length
## 1 6300 2890 Yes 13.2 5 177
## 2 5500 2335 Yes 18.0 5 195
## 3 5500 2280 Yes 16.9 5 180
## 4 5500 2535 Yes 21.1 6 193
## 5 5700 2545 Yes 21.1 4 186
## 6 5200 2565 No 16.4 6 189
## 7 4800 1570 No 18.0 6 200
## 8 4000 1320 No 23.0 6 216
## 9 4800 1690 No 18.8 5 198
## 10 4100 1510 No 18.0 6 206
## 11 6000 1985 No 20.0 5 204
## 12 5200 2380 Yes 15.2 5 182
## 13 5200 2665 Yes 15.6 5 184
## 14 4600 1805 Yes 15.5 4 193
## 15 5200 2595 No 16.5 6 198
## 16 4800 1690 No 20.0 7 178
## 17 4000 1790 No 27.0 8 194
## 18 4200 1350 No 23.0 6 214
## 19 5000 1450 Yes 20.0 2 179
## 20 5300 1990 No 18.0 6 203
## 21 5000 2090 No 16.0 6 183
## 22 4800 1785 No 16.0 6 203
## 23 6000 3285 Yes 13.2 5 174
## 24 4800 2595 Yes 14.0 5 172
## 25 4800 2535 Yes 16.0 6 181
## 26 5000 1970 No 20.0 7 175
## 27 4800 2465 No 16.0 6 192
## 28 6000 2120 Yes 19.8 4 180
## 29 6000 2505 Yes 13.2 5 174
## 30 5800 1980 No 18.0 6 202
## 31 5000 3150 Yes 10.0 4 141
## 32 6500 2410 Yes 13.2 5 171
## 33 4200 2805 Yes 15.9 5 177
## 34 4600 2285 Yes 15.4 4 180
## 35 5500 2340 Yes 15.5 4 179
## 36 4800 2080 Yes 21.0 7 176
## 37 4800 1885 No 16.0 5 192
## 38 4200 1415 No 20.0 6 212
## 39 5700 3755 Yes 10.6 4 151
## 40 5400 3250 Yes 12.4 4 164
## 41 5800 2855 Yes 15.9 4 175
## 42 5900 2650 Yes 11.9 4 173
## 43 5600 2610 Yes 17.0 4 185
## 44 5500 2710 Yes 11.9 5 168
## 45 6000 2745 Yes 13.7 5 172
## 46 5550 2540 Yes 11.9 4 166
## 47 6000 2335 Yes 17.2 5 184
## 48 6000 1955 No 22.5 5 200
## 49 5200 2325 Yes 18.5 5 188
## 50 6000 2510 Yes 20.6 4 191
## 51 4400 1835 No 18.4 6 205
## 52 4600 1840 No 20.0 6 219
## 53 5000 2370 Yes 13.2 4 164
## 54 5500 2220 Yes 14.5 5 172
## 55 5600 2505 Yes 15.5 5 184
## 56 5000 2240 No 19.6 7 190
## 57 6500 2325 Yes 20.0 2 169
## 58 5100 2425 Yes 14.5 5 175
## 59 5500 2220 No 18.5 5 187
## 60 5750 2475 Yes 11.1 4 166
## 61 3800 1730 No 18.0 5 199
## 62 6000 2505 Yes 13.2 5 172
## 63 6000 2210 No 19.0 5 190
## 64 6000 2435 Yes 13.2 5 170
## 65 5600 2130 Yes 15.9 5 181
## 66 4800 2065 No 20.0 7 190
## 67 5200 2045 No 18.5 5 188
## 68 6000 2380 No 15.2 5 188
## 69 5200 2565 No 16.5 5 190
## 70 4800 1690 No 20.0 7 194
## 71 4800 1570 No 18.0 6 201
## 72 5000 2360 Yes 15.9 4 173
## 73 5600 3130 Yes 13.2 4 177
## 74 5200 2665 Yes 15.2 5 181
## 75 4600 1805 Yes 15.5 4 196
## 76 5000 1890 Yes 16.5 5 195
## 77 4800 1565 No 18.0 6 177
## 78 6000 2910 Yes 18.0 5 184
## 79 5000 2145 Yes 12.8 5 176
## 80 5600 2875 Yes 9.2 4 146
## 81 5200 3375 Yes 15.9 5 175
## 82 5600 2330 Yes 15.9 5 179
## 83 6000 3360 Yes 10.6 4 161
## 84 5200 3505 Yes 11.9 5 162
## 85 5400 2405 Yes 15.9 4 174
## 86 5400 2340 Yes 18.5 5 188
## 87 5000 2515 Yes 19.8 7 187
## 88 5500 2550 Yes 12.4 4 163
## 89 4500 2915 Yes 21.1 7 187
## 90 5800 2685 Yes 18.5 5 180
## 91 5800 2385 Yes 18.5 4 159
## 92 5400 2215 Yes 15.8 5 190
## 93 6200 2310 Yes 19.3 5 184
## Wheelbase Width Turn.circle Rear.seat.room Luggage.room Weight Origin
## 1 102 68 37 26.5 11 2705 non-USA
## 2 115 71 38 30.0 15 3560 non-USA
## 3 102 67 37 28.0 14 3375 non-USA
## 4 106 70 37 31.0 17 3405 non-USA
## 5 109 69 39 27.0 13 3640 non-USA
## 6 105 69 41 28.0 16 2880 USA
## 7 111 74 42 30.5 17 3470 USA
## 8 116 78 45 30.5 21 4105 USA
## 9 108 73 41 26.5 14 3495 USA
## 10 114 73 43 35.0 18 3620 USA
## 11 111 74 44 31.0 14 3935 USA
## 12 101 66 38 25.0 13 2490 USA
## 13 103 68 39 26.0 14 2785 USA
## 14 101 74 43 25.0 13 3240 USA
## 15 108 71 40 28.5 16 3195 USA
## 16 110 74 44 30.5 NA 3715 USA
## 17 111 78 42 33.5 NA 4025 USA
## 18 116 77 42 29.5 20 3910 USA
## 19 96 74 43 NA NA 3380 USA
## 20 113 74 40 31.0 15 3515 USA
## 21 104 68 41 30.5 14 3085 USA
## 22 110 69 44 36.0 17 3570 USA
## 23 98 66 32 26.5 11 2270 USA
## 24 97 67 38 26.5 13 2670 USA
## 25 104 68 39 30.5 14 2970 USA
## 26 112 72 42 26.5 NA 3705 USA
## 27 105 69 42 30.5 16 3080 USA
## 28 97 72 40 20.0 11 3805 USA
## 29 98 66 36 26.5 11 2295 USA
## 30 113 74 40 30.0 15 3490 USA
## 31 90 63 33 26.0 12 1845 USA
## 32 98 67 36 28.0 12 2530 USA
## 33 100 68 39 27.5 13 2690 USA
## 34 101 68 40 24.0 12 2850 USA
## 35 103 70 38 23.0 18 2710 USA
## 36 119 72 45 30.0 NA 3735 USA
## 37 106 71 40 27.5 18 3325 USA
## 38 114 78 43 30.0 21 3950 USA
## 39 93 63 34 27.5 10 1695 non-USA
## 40 97 67 37 24.5 11 2475 non-USA
## 41 100 70 39 23.5 8 2865 non-USA
## 42 103 67 36 28.0 12 2350 non-USA
## 43 107 67 41 28.0 14 3040 non-USA
## 44 94 63 35 26.0 11 2345 non-USA
## 45 98 66 36 28.0 12 2620 non-USA
## 46 94 64 34 23.5 9 2285 non-USA
## 47 104 69 41 31.0 14 2885 non-USA
## 48 113 72 42 29.0 15 4000 non-USA
## 49 103 70 40 27.5 14 3510 non-USA
## 50 106 71 39 25.0 9 3515 non-USA
## 51 109 73 42 30.0 19 3695 USA
## 52 117 77 45 31.5 22 4055 USA
## 53 97 66 34 27.0 16 2325 non-USA
## 54 98 66 36 26.5 13 2440 non-USA
## 55 103 69 40 29.5 14 2970 non-USA
## 56 110 72 39 27.5 NA 3735 non-USA
## 57 96 69 37 NA NA 2895 non-USA
## 58 105 67 34 26.0 12 2920 non-USA
## 59 110 69 37 27.0 15 3525 non-USA
## 60 95 65 36 19.0 6 2450 USA
## 61 113 73 38 28.0 15 3610 USA
## 62 98 67 36 26.0 11 2295 non-USA
## 63 107 70 43 27.5 14 3730 non-USA
## 64 96 66 33 26.0 12 2545 non-USA
## 65 103 67 40 28.5 14 3050 non-USA
## 66 112 74 41 27.0 NA 4100 non-USA
## 67 104 69 41 28.5 14 3200 non-USA
## 68 103 67 39 28.0 14 2910 USA
## 69 105 70 42 28.0 16 2890 USA
## 70 110 74 44 30.5 NA 3715 USA
## 71 111 74 42 31.5 17 3470 USA
## 72 97 67 39 24.5 8 2640 USA
## 73 99 66 35 25.5 17 2350 USA
## 74 101 66 39 25.0 13 2575 USA
## 75 101 75 43 25.0 13 3240 USA
## 76 108 72 41 28.5 16 3450 USA
## 77 111 74 43 30.5 18 3495 USA
## 78 99 67 37 26.5 14 2775 non-USA
## 79 102 68 40 26.5 12 2495 USA
## 80 90 60 32 23.5 10 2045 non-USA
## 81 97 65 35 27.5 15 2490 non-USA
## 82 102 67 37 27.0 14 3085 non-USA
## 83 93 63 34 27.5 10 1965 non-USA
## 84 94 65 36 24.0 11 2055 non-USA
## 85 99 69 39 23.0 13 2950 non-USA
## 86 103 70 38 28.5 15 3030 non-USA
## 87 113 71 41 35.0 NA 3785 non-USA
## 88 93 63 34 26.0 10 2240 non-USA
## 89 115 72 38 34.0 NA 3960 non-USA
## 90 103 67 35 31.5 14 2985 non-USA
## 91 97 66 36 26.0 15 2810 non-USA
## 92 104 67 37 29.5 14 2985 non-USA
## 93 105 69 38 30.0 15 3245 non-USA
## Make
## 1 Acura Integra
## 2 Acura Legend
## 3 Audi 90
## 4 Audi 100
## 5 BMW 535i
## 6 Buick Century
## 7 Buick LeSabre
## 8 Buick Roadmaster
## 9 Buick Riviera
## 10 Cadillac DeVille
## 11 Cadillac Seville
## 12 Chevrolet Cavalier
## 13 Chevrolet Corsica
## 14 Chevrolet Camaro
## 15 Chevrolet Lumina
## 16 Chevrolet Lumina_APV
## 17 Chevrolet Astro
## 18 Chevrolet Caprice
## 19 Chevrolet Corvette
## 20 Chrylser Concorde
## 21 Chrysler LeBaron
## 22 Chrysler Imperial
## 23 Dodge Colt
## 24 Dodge Shadow
## 25 Dodge Spirit
## 26 Dodge Caravan
## 27 Dodge Dynasty
## 28 Dodge Stealth
## 29 Eagle Summit
## 30 Eagle Vision
## 31 Ford Festiva
## 32 Ford Escort
## 33 Ford Tempo
## 34 Ford Mustang
## 35 Ford Probe
## 36 Ford Aerostar
## 37 Ford Taurus
## 38 Ford Crown_Victoria
## 39 Geo Metro
## 40 Geo Storm
## 41 Honda Prelude
## 42 Honda Civic
## 43 Honda Accord
## 44 Hyundai Excel
## 45 Hyundai Elantra
## 46 Hyundai Scoupe
## 47 Hyundai Sonata
## 48 Infiniti Q45
## 49 Lexus ES300
## 50 Lexus SC300
## 51 Lincoln Continental
## 52 Lincoln Town_Car
## 53 Mazda 323
## 54 Mazda Protege
## 55 Mazda 626
## 56 Mazda MPV
## 57 Mazda RX-7
## 58 Mercedes-Benz 190E
## 59 Mercedes-Benz 300E
## 60 Mercury Capri
## 61 Mercury Cougar
## 62 Mitsubishi Mirage
## 63 Mitsubishi Diamante
## 64 Nissan Sentra
## 65 Nissan Altima
## 66 Nissan Quest
## 67 Nissan Maxima
## 68 Oldsmobile Achieva
## 69 Oldsmobile Cutlass_Ciera
## 70 Oldsmobile Silhouette
## 71 Oldsmobile Eighty-Eight
## 72 Plymouth Laser
## 73 Pontiac LeMans
## 74 Pontiac Sunbird
## 75 Pontiac Firebird
## 76 Pontiac Grand_Prix
## 77 Pontiac Bonneville
## 78 Saab 900
## 79 Saturn SL
## 80 Subaru Justy
## 81 Subaru Loyale
## 82 Subaru Legacy
## 83 Suzuki Swift
## 84 Toyota Tercel
## 85 Toyota Celica
## 86 Toyota Camry
## 87 Toyota Previa
## 88 Volkswagen Fox
## 89 Volkswagen Eurovan
## 90 Volkswagen Passat
## 91 Volkswagen Corrado
## 92 Volvo 240
## 93 Volvo 850
y <- data_psd$Price
x1 <- data_psd$MPG.city
x2 <- data_psd$MPG.highway
x3 <- data_psd$EngineSize
x4 <- data_psd$Horsepower
x5 <- data_psd$RPM
x6 <- data_psd$Rev.per.mile
x7 <- data_psd$Fuel.tank.capacity
x8 <- data_psd$Passengers
x9 <- data_psd$Length
x10 <- data_psd$Wheelbase
x11 <- data_psd$Width
x12 <- data_psd$Turn.circle
x13 <- data_psd$Rear.seat.room
x14 <- data_psd$Luggage.room
x15 <- data_psd$Weight
data <- cbind(y,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15)
data <- as.data.frame(data)
data
## y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15
## 1 15.9 25 31 1.8 140 6300 2890 13.2 5 177 102 68 37 26.5 11 2705
## 2 33.9 18 25 3.2 200 5500 2335 18.0 5 195 115 71 38 30.0 15 3560
## 3 29.1 20 26 2.8 172 5500 2280 16.9 5 180 102 67 37 28.0 14 3375
## 4 37.7 19 26 2.8 172 5500 2535 21.1 6 193 106 70 37 31.0 17 3405
## 5 30.0 22 30 3.5 208 5700 2545 21.1 4 186 109 69 39 27.0 13 3640
## 6 15.7 22 31 2.2 110 5200 2565 16.4 6 189 105 69 41 28.0 16 2880
## 7 20.8 19 28 3.8 170 4800 1570 18.0 6 200 111 74 42 30.5 17 3470
## 8 23.7 16 25 5.7 180 4000 1320 23.0 6 216 116 78 45 30.5 21 4105
## 9 26.3 19 27 3.8 170 4800 1690 18.8 5 198 108 73 41 26.5 14 3495
## 10 34.7 16 25 4.9 200 4100 1510 18.0 6 206 114 73 43 35.0 18 3620
## 11 40.1 16 25 4.6 295 6000 1985 20.0 5 204 111 74 44 31.0 14 3935
## 12 13.4 25 36 2.2 110 5200 2380 15.2 5 182 101 66 38 25.0 13 2490
## 13 11.4 25 34 2.2 110 5200 2665 15.6 5 184 103 68 39 26.0 14 2785
## 14 15.1 19 28 3.4 160 4600 1805 15.5 4 193 101 74 43 25.0 13 3240
## 15 15.9 21 29 2.2 110 5200 2595 16.5 6 198 108 71 40 28.5 16 3195
## 16 16.3 18 23 3.8 170 4800 1690 20.0 7 178 110 74 44 30.5 NA 3715
## 17 16.6 15 20 4.3 165 4000 1790 27.0 8 194 111 78 42 33.5 NA 4025
## 18 18.8 17 26 5.0 170 4200 1350 23.0 6 214 116 77 42 29.5 20 3910
## 19 38.0 17 25 5.7 300 5000 1450 20.0 2 179 96 74 43 NA NA 3380
## 20 18.4 20 28 3.3 153 5300 1990 18.0 6 203 113 74 40 31.0 15 3515
## 21 15.8 23 28 3.0 141 5000 2090 16.0 6 183 104 68 41 30.5 14 3085
## 22 29.5 20 26 3.3 147 4800 1785 16.0 6 203 110 69 44 36.0 17 3570
## 23 9.2 29 33 1.5 92 6000 3285 13.2 5 174 98 66 32 26.5 11 2270
## 24 11.3 23 29 2.2 93 4800 2595 14.0 5 172 97 67 38 26.5 13 2670
## 25 13.3 22 27 2.5 100 4800 2535 16.0 6 181 104 68 39 30.5 14 2970
## 26 19.0 17 21 3.0 142 5000 1970 20.0 7 175 112 72 42 26.5 NA 3705
## 27 15.6 21 27 2.5 100 4800 2465 16.0 6 192 105 69 42 30.5 16 3080
## 28 25.8 18 24 3.0 300 6000 2120 19.8 4 180 97 72 40 20.0 11 3805
## 29 12.2 29 33 1.5 92 6000 2505 13.2 5 174 98 66 36 26.5 11 2295
## 30 19.3 20 28 3.5 214 5800 1980 18.0 6 202 113 74 40 30.0 15 3490
## 31 7.4 31 33 1.3 63 5000 3150 10.0 4 141 90 63 33 26.0 12 1845
## 32 10.1 23 30 1.8 127 6500 2410 13.2 5 171 98 67 36 28.0 12 2530
## 33 11.3 22 27 2.3 96 4200 2805 15.9 5 177 100 68 39 27.5 13 2690
## 34 15.9 22 29 2.3 105 4600 2285 15.4 4 180 101 68 40 24.0 12 2850
## 35 14.0 24 30 2.0 115 5500 2340 15.5 4 179 103 70 38 23.0 18 2710
## 36 19.9 15 20 3.0 145 4800 2080 21.0 7 176 119 72 45 30.0 NA 3735
## 37 20.2 21 30 3.0 140 4800 1885 16.0 5 192 106 71 40 27.5 18 3325
## 38 20.9 18 26 4.6 190 4200 1415 20.0 6 212 114 78 43 30.0 21 3950
## 39 8.4 46 50 1.0 55 5700 3755 10.6 4 151 93 63 34 27.5 10 1695
## 40 12.5 30 36 1.6 90 5400 3250 12.4 4 164 97 67 37 24.5 11 2475
## 41 19.8 24 31 2.3 160 5800 2855 15.9 4 175 100 70 39 23.5 8 2865
## 42 12.1 42 46 1.5 102 5900 2650 11.9 4 173 103 67 36 28.0 12 2350
## 43 17.5 24 31 2.2 140 5600 2610 17.0 4 185 107 67 41 28.0 14 3040
## 44 8.0 29 33 1.5 81 5500 2710 11.9 5 168 94 63 35 26.0 11 2345
## 45 10.0 22 29 1.8 124 6000 2745 13.7 5 172 98 66 36 28.0 12 2620
## 46 10.0 26 34 1.5 92 5550 2540 11.9 4 166 94 64 34 23.5 9 2285
## 47 13.9 20 27 2.0 128 6000 2335 17.2 5 184 104 69 41 31.0 14 2885
## 48 47.9 17 22 4.5 278 6000 1955 22.5 5 200 113 72 42 29.0 15 4000
## 49 28.0 18 24 3.0 185 5200 2325 18.5 5 188 103 70 40 27.5 14 3510
## 50 35.2 18 23 3.0 225 6000 2510 20.6 4 191 106 71 39 25.0 9 3515
## 51 34.3 17 26 3.8 160 4400 1835 18.4 6 205 109 73 42 30.0 19 3695
## 52 36.1 18 26 4.6 210 4600 1840 20.0 6 219 117 77 45 31.5 22 4055
## 53 8.3 29 37 1.6 82 5000 2370 13.2 4 164 97 66 34 27.0 16 2325
## 54 11.6 28 36 1.8 103 5500 2220 14.5 5 172 98 66 36 26.5 13 2440
## 55 16.5 26 34 2.5 164 5600 2505 15.5 5 184 103 69 40 29.5 14 2970
## 56 19.1 18 24 3.0 155 5000 2240 19.6 7 190 110 72 39 27.5 NA 3735
## 57 32.5 17 25 1.3 255 6500 2325 20.0 2 169 96 69 37 NA NA 2895
## 58 31.9 20 29 2.3 130 5100 2425 14.5 5 175 105 67 34 26.0 12 2920
## 59 61.9 19 25 3.2 217 5500 2220 18.5 5 187 110 69 37 27.0 15 3525
## 60 14.1 23 26 1.6 100 5750 2475 11.1 4 166 95 65 36 19.0 6 2450
## 61 14.9 19 26 3.8 140 3800 1730 18.0 5 199 113 73 38 28.0 15 3610
## 62 10.3 29 33 1.5 92 6000 2505 13.2 5 172 98 67 36 26.0 11 2295
## 63 26.1 18 24 3.0 202 6000 2210 19.0 5 190 107 70 43 27.5 14 3730
## 64 11.8 29 33 1.6 110 6000 2435 13.2 5 170 96 66 33 26.0 12 2545
## 65 15.7 24 30 2.4 150 5600 2130 15.9 5 181 103 67 40 28.5 14 3050
## 66 19.1 17 23 3.0 151 4800 2065 20.0 7 190 112 74 41 27.0 NA 4100
## 67 21.5 21 26 3.0 160 5200 2045 18.5 5 188 104 69 41 28.5 14 3200
## 68 13.5 24 31 2.3 155 6000 2380 15.2 5 188 103 67 39 28.0 14 2910
## 69 16.3 23 31 2.2 110 5200 2565 16.5 5 190 105 70 42 28.0 16 2890
## 70 19.5 18 23 3.8 170 4800 1690 20.0 7 194 110 74 44 30.5 NA 3715
## 71 20.7 19 28 3.8 170 4800 1570 18.0 6 201 111 74 42 31.5 17 3470
## 72 14.4 23 30 1.8 92 5000 2360 15.9 4 173 97 67 39 24.5 8 2640
## 73 9.0 31 41 1.6 74 5600 3130 13.2 4 177 99 66 35 25.5 17 2350
## 74 11.1 23 31 2.0 110 5200 2665 15.2 5 181 101 66 39 25.0 13 2575
## 75 17.7 19 28 3.4 160 4600 1805 15.5 4 196 101 75 43 25.0 13 3240
## 76 18.5 19 27 3.4 200 5000 1890 16.5 5 195 108 72 41 28.5 16 3450
## 77 24.4 19 28 3.8 170 4800 1565 18.0 6 177 111 74 43 30.5 18 3495
## 78 28.7 20 26 2.1 140 6000 2910 18.0 5 184 99 67 37 26.5 14 2775
## 79 11.1 28 38 1.9 85 5000 2145 12.8 5 176 102 68 40 26.5 12 2495
## 80 8.4 33 37 1.2 73 5600 2875 9.2 4 146 90 60 32 23.5 10 2045
## 81 10.9 25 30 1.8 90 5200 3375 15.9 5 175 97 65 35 27.5 15 2490
## 82 19.5 23 30 2.2 130 5600 2330 15.9 5 179 102 67 37 27.0 14 3085
## 83 8.6 39 43 1.3 70 6000 3360 10.6 4 161 93 63 34 27.5 10 1965
## 84 9.8 32 37 1.5 82 5200 3505 11.9 5 162 94 65 36 24.0 11 2055
## 85 18.4 25 32 2.2 135 5400 2405 15.9 4 174 99 69 39 23.0 13 2950
## 86 18.2 22 29 2.2 130 5400 2340 18.5 5 188 103 70 38 28.5 15 3030
## 87 22.7 18 22 2.4 138 5000 2515 19.8 7 187 113 71 41 35.0 NA 3785
## 88 9.1 25 33 1.8 81 5500 2550 12.4 4 163 93 63 34 26.0 10 2240
## 89 19.7 17 21 2.5 109 4500 2915 21.1 7 187 115 72 38 34.0 NA 3960
## 90 20.0 21 30 2.0 134 5800 2685 18.5 5 180 103 67 35 31.5 14 2985
## 91 23.3 18 25 2.8 178 5800 2385 18.5 4 159 97 66 36 26.0 15 2810
## 92 22.7 21 28 2.3 114 5400 2215 15.8 5 190 104 67 37 29.5 14 2985
## 93 26.7 20 28 2.4 168 6200 2310 19.3 5 184 105 69 38 30.0 15 3245
# Cek apakah ada missing value:
data[which(is.na(data$x14)),]
## y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15
## 16 16.3 18 23 3.8 170 4800 1690 20.0 7 178 110 74 44 30.5 NA 3715
## 17 16.6 15 20 4.3 165 4000 1790 27.0 8 194 111 78 42 33.5 NA 4025
## 19 38.0 17 25 5.7 300 5000 1450 20.0 2 179 96 74 43 NA NA 3380
## 26 19.0 17 21 3.0 142 5000 1970 20.0 7 175 112 72 42 26.5 NA 3705
## 36 19.9 15 20 3.0 145 4800 2080 21.0 7 176 119 72 45 30.0 NA 3735
## 56 19.1 18 24 3.0 155 5000 2240 19.6 7 190 110 72 39 27.5 NA 3735
## 57 32.5 17 25 1.3 255 6500 2325 20.0 2 169 96 69 37 NA NA 2895
## 66 19.1 17 23 3.0 151 4800 2065 20.0 7 190 112 74 41 27.0 NA 4100
## 70 19.5 18 23 3.8 170 4800 1690 20.0 7 194 110 74 44 30.5 NA 3715
## 87 22.7 18 22 2.4 138 5000 2515 19.8 7 187 113 71 41 35.0 NA 3785
## 89 19.7 17 21 2.5 109 4500 2915 21.1 7 187 115 72 38 34.0 NA 3960
# Menghapus baris dengan nilai NA
data <- na.omit(data)
Melihat data 6 teratas
head(data)
## y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15
## 1 15.9 25 31 1.8 140 6300 2890 13.2 5 177 102 68 37 26.5 11 2705
## 2 33.9 18 25 3.2 200 5500 2335 18.0 5 195 115 71 38 30.0 15 3560
## 3 29.1 20 26 2.8 172 5500 2280 16.9 5 180 102 67 37 28.0 14 3375
## 4 37.7 19 26 2.8 172 5500 2535 21.1 6 193 106 70 37 31.0 17 3405
## 5 30.0 22 30 3.5 208 5700 2545 21.1 4 186 109 69 39 27.0 13 3640
## 6 15.7 22 31 2.2 110 5200 2565 16.4 6 189 105 69 41 28.0 16 2880
# Sebaran peubah Y (Indeks Alibaca)
hist(data$y, col = "blue")
reglin <- lm(y ~ ., data = data)
coefficients(reglin)
## (Intercept) x1 x2 x3 x4 x5
## 37.828539238 0.070542200 -0.310033849 0.170016187 0.134009410 -0.002409839
## x6 x7 x8 x9 x10 x11
## 0.003238040 0.142415262 -0.774612972 -0.001480220 0.718253745 -1.342349962
## x12 x13 x14 x15
## -0.501002215 0.018016142 0.118545861 0.004134950
Pada metode regresi linier berganda model yang didapatkan yaitu \[y=37.828539238 + .070542200x1-0.310033849x2+0.170016187x3+0.134009410x4-0.002409839x5+0.003238040x6+0.142415262x7-0.774612972x8-0.001480220x9+0.718253745x10-1.342349962x11-0.501002215x12+0.018016142x13+0.118545861x14+0.004134950x15\] # Multikolinieritas
car::vif(reglin)
## x1 x2 x3 x4 x5 x6 x7 x8
## 16.609347 13.392348 18.266085 14.974580 5.118684 4.348188 7.086879 3.258554
## x9 x10 x11 x12 x13 x14 x15
## 10.833645 10.382059 8.900447 3.856814 3.449777 3.486519 30.232872
Dapat dilihat beberapa peubah memiliki nilai VIF > 10 yaitu peubah x1,x2,x3,x4,x15 sehingga terdapat multikolinieritas.
##Kolmogorov-Smirnov Test
ks.test(reglin$residuals, "pnorm", mean=mean(reglin$residuals), sd=sd(reglin$residuals))
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: reglin$residuals
## D = 0.087173, p-value = 0.5327
## alternative hypothesis: two-sided
Berdasarkan Kolmogorov-Smirnov test, residual data menyebar normal dengan p-value > 5%. ## Uji Asumsi Homoskedastisitas (Gauss Markov)
lmtest::bptest(reglin)
##
## studentized Breusch-Pagan test
##
## data: reglin
## BP = 23.494, df = 15, p-value = 0.0742
Karena p-value > 0.05 maka ragam sisaan homogen atau tidak terdapat masalah heteroskedastisitas
library(randtests)
runs.test(reglin$residuals)
##
## Runs Test
##
## data: reglin$residuals
## statistic = -0.44448, runs = 40, n1 = 41, n2 = 41, n = 82, p-value =
## 0.6567
## alternative hypothesis: nonrandomness
Karena p-value > 0.05 maka sisaan saling bebas. ## Nilai harapan sisaan sama dengan nol (Gauss Markov)
t.test(reglin$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: reglin$residuals
## t = -1.6908e-16, df = 81, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -1.115204 1.115204
## sample estimates:
## mean of x
## -9.476985e-17
Karena p-value > 0.05 maka nilai harapan sisaan sama dengan nol
bmodelselect <- step(reglin, direction="backward", scope=formula(lm(y ~ x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15, data)), trace=1)
## Start: AIC=297.4
## y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 +
## x12 + x13 + x14 + x15
##
## Df Sum of Sq RSS AIC
## - x9 1 0.004 2086.6 295.40
## - x13 1 0.062 2086.7 295.40
## - x3 1 0.129 2086.7 295.40
## - x1 1 0.759 2087.3 295.43
## - x7 1 2.101 2088.7 295.48
## - x14 1 2.934 2089.5 295.51
## - x8 1 7.494 2094.1 295.69
## - x2 1 14.598 2101.2 295.97
## - x15 1 14.672 2101.3 295.97
## - x5 1 31.314 2117.9 296.62
## - x6 1 48.300 2134.9 297.27
## <none> 2086.6 297.40
## - x12 1 52.799 2139.4 297.45
## - x10 1 168.325 2254.9 301.76
## - x11 1 223.753 2310.3 303.75
## - x4 1 253.208 2339.8 304.79
##
## Step: AIC=295.4
## y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x10 + x11 + x12 +
## x13 + x14 + x15
##
## Df Sum of Sq RSS AIC
## - x13 1 0.062 2086.7 293.40
## - x3 1 0.127 2086.7 293.40
## - x1 1 0.807 2087.4 293.43
## - x7 1 2.099 2088.7 293.48
## - x14 1 2.946 2089.6 293.51
## - x8 1 7.793 2094.4 293.70
## - x2 1 15.043 2101.6 293.99
## - x15 1 15.636 2102.2 294.01
## - x5 1 32.373 2119.0 294.66
## - x6 1 48.648 2135.2 295.29
## <none> 2086.6 295.40
## - x12 1 55.945 2142.5 295.57
## - x10 1 185.176 2271.8 300.37
## - x11 1 245.642 2332.2 302.52
## - x4 1 271.702 2358.3 303.44
##
## Step: AIC=293.4
## y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x10 + x11 + x12 +
## x14 + x15
##
## Df Sum of Sq RSS AIC
## - x3 1 0.173 2086.8 291.41
## - x1 1 0.853 2087.5 291.44
## - x7 1 2.093 2088.8 291.48
## - x14 1 3.384 2090.0 291.53
## - x8 1 9.082 2095.7 291.76
## - x2 1 15.076 2101.7 291.99
## - x15 1 15.605 2102.3 292.01
## - x5 1 32.838 2119.5 292.68
## - x6 1 48.713 2135.4 293.29
## <none> 2086.7 293.40
## - x12 1 57.152 2143.8 293.62
## - x10 1 205.775 2292.4 299.11
## - x4 1 272.641 2359.3 301.47
## - x11 1 289.439 2376.1 302.05
##
## Step: AIC=291.41
## y ~ x1 + x2 + x4 + x5 + x6 + x7 + x8 + x10 + x11 + x12 + x14 +
## x15
##
## Df Sum of Sq RSS AIC
## - x1 1 1.00 2087.8 289.45
## - x7 1 2.44 2089.3 289.50
## - x14 1 3.52 2090.3 289.55
## - x8 1 8.91 2095.7 289.76
## - x2 1 15.37 2102.2 290.01
## - x15 1 15.45 2102.3 290.01
## <none> 2086.8 291.41
## - x6 1 52.43 2139.3 291.44
## - x12 1 57.20 2144.0 291.63
## - x5 1 63.05 2149.9 291.85
## - x10 1 218.67 2305.5 297.58
## - x11 1 294.32 2381.2 300.23
## - x4 1 434.73 2521.6 304.93
##
## Step: AIC=289.45
## y ~ x2 + x4 + x5 + x6 + x7 + x8 + x10 + x11 + x12 + x14 + x15
##
## Df Sum of Sq RSS AIC
## - x7 1 1.94 2089.8 287.52
## - x14 1 3.14 2091.0 287.57
## - x8 1 8.49 2096.3 287.78
## - x15 1 15.89 2103.7 288.07
## - x2 1 33.39 2121.2 288.75
## <none> 2087.8 289.45
## - x12 1 59.03 2146.9 289.73
## - x6 1 60.87 2148.7 289.80
## - x5 1 62.85 2150.7 289.88
## - x10 1 217.82 2305.7 295.58
## - x11 1 296.55 2384.4 298.34
## - x4 1 437.06 2524.9 303.03
##
## Step: AIC=287.52
## y ~ x2 + x4 + x5 + x6 + x8 + x10 + x11 + x12 + x14 + x15
##
## Df Sum of Sq RSS AIC
## - x14 1 5.03 2094.8 285.72
## - x8 1 9.72 2099.5 285.90
## - x15 1 22.26 2112.0 286.39
## - x2 1 40.43 2130.2 287.10
## <none> 2089.8 287.52
## - x12 1 60.09 2149.9 287.85
## - x5 1 61.11 2150.9 287.89
## - x6 1 71.79 2161.6 288.29
## - x10 1 222.05 2311.8 293.80
## - x11 1 294.68 2384.4 296.34
## - x4 1 439.02 2528.8 301.16
##
## Step: AIC=285.72
## y ~ x2 + x4 + x5 + x6 + x8 + x10 + x11 + x12 + x15
##
## Df Sum of Sq RSS AIC
## - x8 1 6.42 2101.2 283.97
## - x15 1 27.38 2122.2 284.79
## - x2 1 36.33 2131.1 285.13
## <none> 2094.8 285.72
## - x12 1 63.07 2157.9 286.15
## - x5 1 69.18 2164.0 286.38
## - x6 1 71.70 2166.5 286.48
## - x10 1 235.41 2330.2 292.45
## - x11 1 290.48 2385.3 294.37
## - x4 1 434.04 2528.8 299.16
##
## Step: AIC=283.97
## y ~ x2 + x4 + x5 + x6 + x10 + x11 + x12 + x15
##
## Df Sum of Sq RSS AIC
## - x15 1 27.59 2128.8 283.04
## - x2 1 31.07 2132.3 283.18
## <none> 2101.2 283.97
## - x12 1 66.02 2167.2 284.51
## - x5 1 72.81 2174.0 284.76
## - x6 1 73.15 2174.4 284.78
## - x10 1 248.75 2350.0 291.15
## - x11 1 289.74 2391.0 292.56
## - x4 1 501.26 2602.5 299.51
##
## Step: AIC=283.04
## y ~ x2 + x4 + x5 + x6 + x10 + x11 + x12
##
## Df Sum of Sq RSS AIC
## <none> 2128.8 283.04
## - x12 1 54.57 2183.4 283.12
## - x6 1 64.46 2193.3 283.49
## - x2 1 94.82 2223.6 284.61
## - x5 1 111.73 2240.5 285.24
## - x11 1 271.26 2400.1 290.88
## - x10 1 602.00 2730.8 301.46
## - x4 1 1323.14 3451.9 320.68
summary(bmodelselect)
##
## Call:
## lm(formula = y ~ x2 + x4 + x5 + x6 + x10 + x11 + x12, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.527 -3.270 -0.156 2.567 23.319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.775472 28.530918 1.219 0.22676
## x2 -0.323784 0.178341 -1.816 0.07349 .
## x4 0.162091 0.023901 6.782 2.50e-09 ***
## x5 -0.003113 0.001580 -1.971 0.05250 .
## x6 0.003247 0.002169 1.497 0.13867
## x10 0.835708 0.182688 4.575 1.88e-05 ***
## x11 -1.270318 0.413689 -3.071 0.00299 **
## x12 -0.476633 0.346073 -1.377 0.17258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.364 on 74 degrees of freedom
## Multiple R-squared: 0.735, Adjusted R-squared: 0.71
## F-statistic: 29.33 on 7 and 74 DF, p-value: < 2.2e-16
Berdasarkan metode backward, model terbaik adalah model dengan peubah x2,x4,x10,x11,x5,x12,x9 dengan R−squared:0.735
fmodelselect <- step(lm(y ~ 1, data), direction="forward", scope=formula(reglin), trace=1)
## Start: AIC=377.95
## y ~ 1
##
## Df Sum of Sq RSS AIC
## + x4 1 4979.3 3054.9 300.66
## + x15 1 4369.7 3664.5 315.58
## + x7 1 4019.1 4015.1 323.07
## + x3 1 3309.4 4724.8 336.42
## + x10 1 3172.3 4862.0 338.76
## + x2 1 3147.7 4886.6 339.18
## + x1 1 3104.0 4930.2 339.91
## + x9 1 2448.8 5585.4 350.14
## + x11 1 1969.2 6065.0 356.89
## + x6 1 1534.4 6499.8 362.57
## + x12 1 1450.2 6584.0 363.63
## + x14 1 1079.6 6954.7 368.12
## + x13 1 940.1 7094.1 369.75
## + x8 1 601.5 7432.7 373.57
## <none> 8034.2 377.95
## + x5 1 9.8 8024.4 379.85
##
## Step: AIC=300.66
## y ~ x4
##
## Df Sum of Sq RSS AIC
## + x10 1 162.358 2892.5 298.18
## + x7 1 143.457 2911.4 298.71
## + x13 1 129.978 2924.9 299.09
## + x15 1 113.716 2941.2 299.55
## + x2 1 109.520 2945.4 299.66
## <none> 3054.9 300.66
## + x1 1 71.764 2983.1 300.71
## + x14 1 64.739 2990.2 300.90
## + x8 1 59.904 2995.0 301.03
## + x11 1 33.758 3021.1 301.75
## + x9 1 28.638 3026.3 301.89
## + x12 1 26.582 3028.3 301.94
## + x5 1 23.582 3031.3 302.02
## + x3 1 22.983 3031.9 302.04
## + x6 1 10.369 3044.5 302.38
##
## Step: AIC=298.18
## y ~ x4 + x10
##
## Df Sum of Sq RSS AIC
## + x11 1 476.86 2415.7 285.41
## + x12 1 239.12 2653.4 293.11
## + x6 1 156.72 2735.8 295.61
## + x9 1 103.05 2789.5 297.20
## <none> 2892.5 298.18
## + x2 1 63.39 2829.2 298.36
## + x3 1 43.49 2849.1 298.94
## + x7 1 30.94 2861.6 299.30
## + x13 1 15.79 2876.8 299.73
## + x1 1 15.00 2877.5 299.75
## + x5 1 7.79 2884.8 299.96
## + x14 1 2.01 2890.5 300.12
## + x8 1 1.30 2891.2 300.14
## + x15 1 0.41 2892.1 300.17
##
## Step: AIC=285.41
## y ~ x4 + x10 + x11
##
## Df Sum of Sq RSS AIC
## + x15 1 104.686 2311.0 283.77
## + x2 1 102.143 2313.5 283.87
## + x5 1 93.859 2321.8 284.16
## + x7 1 80.679 2335.0 284.62
## + x1 1 71.573 2344.1 284.94
## <none> 2415.7 285.41
## + x3 1 33.363 2382.3 286.27
## + x12 1 27.074 2388.6 286.48
## + x14 1 15.010 2400.7 286.90
## + x6 1 11.813 2403.9 287.01
## + x13 1 3.979 2411.7 287.27
## + x9 1 0.621 2415.1 287.39
## + x8 1 0.525 2415.2 287.39
##
## Step: AIC=283.77
## y ~ x4 + x10 + x11 + x15
##
## Df Sum of Sq RSS AIC
## + x12 1 66.609 2244.4 283.38
## <none> 2311.0 283.77
## + x6 1 50.939 2260.1 283.95
## + x5 1 33.304 2277.7 284.58
## + x2 1 25.663 2285.3 284.86
## + x7 1 23.781 2287.2 284.93
## + x1 1 9.979 2301.0 285.42
## + x9 1 9.570 2301.4 285.43
## + x3 1 6.266 2304.7 285.55
## + x13 1 5.778 2305.2 285.57
## + x8 1 5.647 2305.3 285.57
## + x14 1 3.271 2307.7 285.66
##
## Step: AIC=283.38
## y ~ x4 + x10 + x11 + x15 + x12
##
## Df Sum of Sq RSS AIC
## <none> 2244.4 283.38
## + x5 1 47.463 2196.9 283.62
## + x6 1 39.642 2204.8 283.92
## + x2 1 23.808 2220.6 284.50
## + x7 1 16.458 2227.9 284.77
## + x1 1 11.395 2233.0 284.96
## + x3 1 10.288 2234.1 285.00
## + x8 1 3.817 2240.6 285.24
## + x14 1 2.516 2241.9 285.28
## + x9 1 1.793 2242.6 285.31
## + x13 1 1.348 2243.0 285.33
summary(fmodelselect)
##
## Call:
## lm(formula = y ~ x4 + x10 + x11 + x15 + x12, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2912 -2.9626 -0.4857 2.5035 25.0623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.21903 20.13032 1.601 0.113632
## x4 0.10837 0.02551 4.248 6.04e-05 ***
## x10 0.56663 0.22235 2.548 0.012839 *
## x11 -1.34628 0.39044 -3.448 0.000924 ***
## x15 0.00884 0.00400 2.210 0.030123 *
## x12 -0.52682 0.35079 -1.502 0.137281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.434 on 76 degrees of freedom
## Multiple R-squared: 0.7206, Adjusted R-squared: 0.7023
## F-statistic: 39.21 on 5 and 76 DF, p-value: < 2.2e-16
Berdasarkan metode forward, model terbaik adalah model dengan peubah x4,x10,x11,x15,x12 dengan R−squared:0.7206.
smodelselect <- step(lm(y ~ 1, data), direction="both", scope=formula(reglin), trace=1)
## Start: AIC=377.95
## y ~ 1
##
## Df Sum of Sq RSS AIC
## + x4 1 4979.3 3054.9 300.66
## + x15 1 4369.7 3664.5 315.58
## + x7 1 4019.1 4015.1 323.07
## + x3 1 3309.4 4724.8 336.42
## + x10 1 3172.3 4862.0 338.76
## + x2 1 3147.7 4886.6 339.18
## + x1 1 3104.0 4930.2 339.91
## + x9 1 2448.8 5585.4 350.14
## + x11 1 1969.2 6065.0 356.89
## + x6 1 1534.4 6499.8 362.57
## + x12 1 1450.2 6584.0 363.63
## + x14 1 1079.6 6954.7 368.12
## + x13 1 940.1 7094.1 369.75
## + x8 1 601.5 7432.7 373.57
## <none> 8034.2 377.95
## + x5 1 9.8 8024.4 379.85
##
## Step: AIC=300.66
## y ~ x4
##
## Df Sum of Sq RSS AIC
## + x10 1 162.4 2892.5 298.18
## + x7 1 143.5 2911.4 298.71
## + x13 1 130.0 2924.9 299.09
## + x15 1 113.7 2941.2 299.55
## + x2 1 109.5 2945.4 299.66
## <none> 3054.9 300.66
## + x1 1 71.8 2983.1 300.71
## + x14 1 64.7 2990.2 300.90
## + x8 1 59.9 2995.0 301.03
## + x11 1 33.8 3021.1 301.75
## + x9 1 28.6 3026.3 301.89
## + x12 1 26.6 3028.3 301.94
## + x5 1 23.6 3031.3 302.02
## + x3 1 23.0 3031.9 302.04
## + x6 1 10.4 3044.5 302.38
## - x4 1 4979.3 8034.2 377.95
##
## Step: AIC=298.18
## y ~ x4 + x10
##
## Df Sum of Sq RSS AIC
## + x11 1 476.86 2415.7 285.41
## + x12 1 239.12 2653.4 293.10
## + x6 1 156.72 2735.8 295.61
## + x9 1 103.05 2789.5 297.21
## <none> 2892.5 298.18
## + x2 1 63.39 2829.2 298.36
## + x3 1 43.49 2849.1 298.94
## + x7 1 30.94 2861.6 299.30
## + x13 1 15.79 2876.7 299.73
## + x1 1 15.00 2877.5 299.75
## + x5 1 7.79 2884.7 299.96
## + x14 1 2.01 2890.5 300.12
## + x8 1 1.30 2891.2 300.14
## + x15 1 0.41 2892.1 300.17
## - x10 1 162.36 3054.9 300.66
## - x4 1 1969.44 4862.0 338.76
##
## Step: AIC=285.41
## y ~ x4 + x10 + x11
##
## Df Sum of Sq RSS AIC
## + x15 1 104.69 2311.0 283.77
## + x2 1 102.14 2313.5 283.86
## + x5 1 93.86 2321.8 284.16
## + x7 1 80.68 2335.0 284.62
## + x1 1 71.57 2344.1 284.94
## <none> 2415.7 285.41
## + x3 1 33.36 2382.3 286.27
## + x12 1 27.07 2388.6 286.48
## + x14 1 15.01 2400.7 286.90
## + x6 1 11.81 2403.9 287.01
## + x13 1 3.98 2411.7 287.27
## + x9 1 0.62 2415.1 287.39
## + x8 1 0.53 2415.2 287.39
## - x11 1 476.86 2892.5 298.18
## - x10 1 605.46 3021.1 301.75
## - x4 1 2402.21 4817.9 340.02
##
## Step: AIC=283.77
## y ~ x4 + x10 + x11 + x15
##
## Df Sum of Sq RSS AIC
## + x12 1 66.61 2244.4 283.38
## <none> 2311.0 283.77
## + x6 1 50.94 2260.1 283.95
## + x5 1 33.30 2277.7 284.58
## + x2 1 25.66 2285.3 284.86
## + x7 1 23.78 2287.2 284.93
## - x15 1 104.69 2415.7 285.41
## + x1 1 9.98 2301.0 285.42
## + x9 1 9.57 2301.4 285.43
## + x3 1 6.27 2304.7 285.55
## + x13 1 5.78 2305.2 285.57
## + x8 1 5.65 2305.3 285.57
## + x14 1 3.27 2307.7 285.66
## - x10 1 199.01 2510.0 288.55
## - x11 1 581.14 2892.1 300.17
## - x4 1 614.83 2925.8 301.12
##
## Step: AIC=283.38
## y ~ x4 + x10 + x11 + x15 + x12
##
## Df Sum of Sq RSS AIC
## <none> 2244.4 283.38
## + x5 1 47.46 2196.9 283.62
## - x12 1 66.61 2311.0 283.77
## + x6 1 39.64 2204.8 283.92
## + x2 1 23.81 2220.6 284.50
## + x7 1 16.46 2227.9 284.77
## + x1 1 11.39 2233.0 284.96
## + x3 1 10.29 2234.1 285.00
## + x8 1 3.82 2240.6 285.24
## + x14 1 2.52 2241.9 285.28
## + x9 1 1.79 2242.6 285.31
## + x13 1 1.35 2243.0 285.33
## - x15 1 144.22 2388.6 286.48
## - x10 1 191.79 2436.2 288.10
## - x11 1 351.11 2595.5 293.30
## - x4 1 533.03 2777.4 298.85
summary(smodelselect)
##
## Call:
## lm(formula = y ~ x4 + x10 + x11 + x15 + x12, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2912 -2.9626 -0.4857 2.5035 25.0623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.21903 20.13032 1.601 0.113632
## x4 0.10837 0.02551 4.248 6.04e-05 ***
## x10 0.56663 0.22235 2.548 0.012839 *
## x11 -1.34628 0.39044 -3.448 0.000924 ***
## x15 0.00884 0.00400 2.210 0.030123 *
## x12 -0.52682 0.35079 -1.502 0.137281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.434 on 76 degrees of freedom
## Multiple R-squared: 0.7206, Adjusted R-squared: 0.7023
## F-statistic: 39.21 on 5 and 76 DF, p-value: < 2.2e-16
Berdasarkan metode stepwise, model terbaik adalah model dengan peubah x4,x10,x11,x15,x12 dengan R−squared : 0.7206. Hal ini sama dengan metode forward
Dari ketiga metode yang digunakan, metode backward menghasilkan nilai R-Square yang paling tinggi yaitu 0.735 dan nilai AIC paling kecil yaitu 238.04. Peubah signifikannya x2,x4,x10,x11,x5,x12,x9.
Regresi Ridge adalah metode yang dapat digunakan untuk mencocokkan modelregresi ketika data mengandung multikolinieritas. Regresi Ridge meminimumkan Jumlah Kuadrat Residual (JKR) prediktor dalam model. Regresi Ridge cenderung menyusutkan estimasi koefisien menuju nol.
Pada regresi kali ini peubah y yang digunakan adalah Price, sedangkan peubah yang lain sebagai peubah x
matrix_X <- data.matrix(data[, -1])
matrix_Y <- matrix(data$y) # Assuming 'y' is a single-column vector
alpha_ridge = 0
model_Ridge <- glmnet::cv.glmnet(matrix_X,matrix_Y,alpha=alpha_ridge)
summary(model_Ridge)
## Length Class Mode
## lambda 100 -none- numeric
## cvm 100 -none- numeric
## cvsd 100 -none- numeric
## cvup 100 -none- numeric
## cvlo 100 -none- numeric
## nzero 100 -none- numeric
## call 4 -none- call
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## index 2 -none- numeric
# Hasil Regresi Ridge
print(model_Ridge)
##
## Call: glmnet::cv.glmnet(x = matrix_X, y = matrix_Y, alpha = alpha_ridge)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 1.03 97 35.84 12.53 15
## 1se 22.19 64 47.83 17.02 15
# Memilih nilai lambda terbaik
best_lambda <- model_Ridge$lambda.min
cat("Lambda terbaik:", best_lambda, "\n")
## Lambda terbaik: 1.030127
Model regresi Ridge menggunakan nilai alpha sebesar 0 dan nilai lambda sebesar 0,779253.
# Melakukan prediksi dengan model Ridge terbaik
predictions <- predict(model_Ridge, s = best_lambda, newx = matrix_X)
# koefisien Ridge
coefficients(model_Ridge, s = best_lambda)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 9.6786999537
## x1 -0.0630535243
## x2 -0.2444103804
## x3 1.2931682127
## x4 0.0803119908
## x5 0.0002368801
## x6 0.0025017714
## x7 0.3432842985
## x8 -0.9018156668
## x9 -0.0108635483
## x10 0.3846892783
## x11 -0.6332718739
## x12 -0.4284063520
## x13 0.1944085285
## x14 0.0272863452
## x15 0.0040684886
Output di atas merupakan nilai koefisien dari setiap peubah bebas terhadap peubah respons (Price). Dapat terlihat bahwa Peubah x3 merupakan peubah dengan koefisien positif tertinggi yang berpengaruh terhadap model. Sehingga dapat disimpulkan bahwa peubah ini merupakan peubah yang paling berpengaruh terhadap peubah respons jika dibandingkan peubah bebas lainnya.
Pada metode regresi Ridge mempertahankan semua peubah penjelas sehingga peubah regresinya yaitu \[y=13.7586907935-0.0503097182x1-0.2467790901x2+1.2786912068x3+0.0863003988x4-0.0000548118x5+0.0026887206x6+0.3173850188x7-0.8989151160x8-0.0119786744x9+0.4255961667x10-0.7322525890x11-0.4534611215x12+0.1773534382x13+0.0324167068x14+0.0042622860x15\]
# R-squared untuk model Ridge
r_squared_ridge <- 1 - sum((matrix_Y - predictions)^2) / sum((matrix_Y - mean(matrix_Y))^2)
r_squared_ridge
## [1] 0.7180968
Pada regresi Ridge, tidak terdapat peubah yang dihilangkan atau semua peubah dimasukkan dalam model. R-Square yang diperoleh dengan regresi ridge adalah R−squared : 0.7240635
alpha_Lasso = 1
model_Lasso <- glmnet::cv.glmnet(matrix_X,matrix_Y,alpha=alpha_Lasso)
model_Lasso
##
## Call: glmnet::cv.glmnet(x = matrix_X, y = matrix_Y, alpha = alpha_Lasso)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 0.1079 47 37.61 10.38 11
## 1se 2.5517 13 46.78 15.27 3
print(model_Lasso)
##
## Call: glmnet::cv.glmnet(x = matrix_X, y = matrix_Y, alpha = alpha_Lasso)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 0.1079 47 37.61 10.38 11
## 1se 2.5517 13 46.78 15.27 3
# Memilih nilai lambda terbaik
best_lambda1 <- model_Lasso$lambda.min
cat("Lambda terbaik:", best_lambda1, "\n")
## Lambda terbaik: 0.1079177
# Melakukan prediksi dengan model Ridge terbaik
predictions_lasso <- predict(model_Lasso, s = best_lambda1, newx = matrix_X)
# Koefisien Ridge
coefficients(model_Lasso, s = best_lambda1)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 25.534175439
## x1 .
## x2 -0.207182814
## x3 .
## x4 0.127242013
## x5 -0.001565343
## x6 0.002288748
## x7 0.209215008
## x8 -0.126593161
## x9 .
## x10 0.603912405
## x11 -1.082229618
## x12 -0.414800762
## x13 .
## x14 0.014194373
## x15 0.003439394
Output di atas merupakan nilai koefisien dari setiap peubah bebas terhadap peubah respons (Price). Dapat terlihat bahwa Peubah x10 merupakan peubah dengan koefisien positif tertinggi yang berpengaruh terhadap model. Sehingga dapat disimpulkan bahwa peubah ini merupakan peubah yang paling berpengaruh terhadap peubah respons jika dibandingkan peubah bebas lainnya.
Pada metode regresi Lasso hanya membuang peubah penjelas x1,x3,x9, dan x13 sehingga peubah regresinya yaitu \[y=33.486554456-0.229854221x2+0.127242013x4-0.001565343x5+0.002288748x6+0.209215008x7-0.126593161x8+0.603912405x10-1.082229618x11-0.414800762x12+0.014194373x14+0.003439394x15\]
# Menghitung R-squared untuk model Lasso
r_squared_lasso <- 1 - sum((matrix_Y - predictions_lasso)^2) / sum((matrix_Y - mean(matrix_Y))^2)
r_squared_lasso
## [1] 0.7350772
Melalui regresi Lasso terlihat bahwa hanya peubah x1 saja yang dihapus. R-Square yang diperoleh dengan regresi lasso adalah R−squared:0.7350772
Jika dibandingkan dari ketiga model, berdasarkan nilai R-Square, model terbaik adalah menggunakan regresi Lasso dengan R−squared: 0.7350772
Model regresi Lasso yang diperoleh adalah \[y=33.486554456-0.229854221x2+0.127242013x4-0.001565343x5+0.002288748x6+0.209215008x7-0.126593161x8+0.603912405x10-1.082229618x11-0.414800762x12+0.014194373x14+0.003439394x15\]
Model menjelaskan untuk setiap penambahan satu satuan x2,x4,x5,x6,x7,x8,x10,x11,x12,x14,x15 dengan peubah lain dianggap tetap akan meningkatkan peubah respon sebanyak koefisien dari masing-masing peubah.